diff --git a/.gitignore b/.gitignore index 0ad1b338..eb18aeef 100644 --- a/.gitignore +++ b/.gitignore @@ -57,6 +57,7 @@ autoconf/autom4te.cache .zed # pythonenv for github Codespaces pythonenv* +seeds # clangd index. (".clangd" is a config file now, thus trailing slash) .clangd/ .cache diff --git a/example_participant_pattern_test.yaml b/example_participant_pattern_test.yaml new file mode 100644 index 00000000..52631e40 --- /dev/null +++ b/example_participant_pattern_test.yaml @@ -0,0 +1,46 @@ +# Example test case using BufferParticipantPattern rule for WaveParticipantBitTracking +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [8, 1, 1] # 8 threads +Buffers: + # Actual output from the shader (may have patterns in any order) + - Name: ParticipantOutput + Format: UInt32 + ZeroInitSize: 96 # Space for multiple patterns (3 uint32 per pattern) + + # Expected patterns - order doesn't matter, but pattern counts must match + - Name: ExpectedPatterns + Format: UInt32 + Data: [ + # Pattern 1: Wave op ID 69, loop iteration 0, participants 0,1,2,3 + 4416, 0x000F, 0x0000, # (69<<6)|0, mask for threads 0-3, high mask + 4416, 0x000F, 0x0000, # Duplicate 1 + 4416, 0x000F, 0x0000, # Duplicate 2 + 4416, 0x000F, 0x0000, # Duplicate 3 (4 participants = 4 copies) + + # Pattern 2: Wave op ID 70, loop iteration 1, participants 4,5,6,7 + 4496, 0x00F0, 0x0000, # (70<<6)|(1<<4), mask for threads 4-7, high mask + 4496, 0x00F0, 0x0000, # Duplicate 1 + 4496, 0x00F0, 0x0000, # Duplicate 2 + 4496, 0x00F0, 0x0000, # Duplicate 3 (4 participants = 4 copies) + ] + +Results: + - Result: ValidateParticipantPatterns + Rule: BufferParticipantPattern + GroupSize: 3 # Each pattern consists of 3 uint32 values + Actual: ParticipantOutput + Expected: ExpectedPatterns + +DescriptorSets: + - Resources: + - Name: ParticipantOutput + Kind: RWBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 +... \ No newline at end of file diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index a2f0ded2..7c497bf3 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -25,7 +25,7 @@ namespace offloadtest { enum class Stages { Compute }; -enum class Rule { BufferExact, BufferFloatULP, BufferFloatEpsilon }; +enum class Rule { BufferExact, BufferFloatULP, BufferFloatEpsilon, BufferParticipantPattern }; enum class DenormMode { Any, FTZ, Preserve }; @@ -131,6 +131,7 @@ struct Result { DenormMode DM = DenormMode::Any; unsigned ULPT; // ULP Tolerance double Epsilon; + unsigned GroupSize = 0; // For BufferParticipantPattern rule }; struct Resource { @@ -357,6 +358,7 @@ template <> struct ScalarEnumerationTraits { ENUM_CASE(BufferExact); ENUM_CASE(BufferFloatULP); ENUM_CASE(BufferFloatEpsilon); + ENUM_CASE(BufferParticipantPattern); #undef ENUM_CASE } }; diff --git a/lib/Support/Check.cpp b/lib/Support/Check.cpp index 640a4bf4..c969d526 100644 --- a/lib/Support/Check.cpp +++ b/lib/Support/Check.cpp @@ -16,7 +16,9 @@ #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" #include +#include #include +#include constexpr uint16_t Float16BitSign = 0x8000; constexpr uint16_t Float16BitExp = 0x7c00; @@ -277,6 +279,127 @@ static bool testBufferFloatULP(offloadtest::Buffer *B1, offloadtest::Buffer *B2, return false; } +static bool testBufferParticipantPattern(offloadtest::Buffer *B1, + offloadtest::Buffer *B2, + unsigned GroupSize, + std::string &ErrorMsg) { + // Expect 3 x uint32_t: (combinedId, maskLow, maskHigh) + if (GroupSize == 0) { + ErrorMsg = "Invalid GroupSize (must be > 0)"; + return false; + } + + // Basic structural checks similar to testBufferExact + if (B1->ArraySize != B2->ArraySize || B1->size() != B2->size()) { + ErrorMsg = "Mismatched buffer shape (ArraySize or per-chunk size differs)"; + return false; + } + + // We operate on 32-bit words + if ((B1->size() % sizeof(uint32_t)) != 0) { + ErrorMsg = "Chunk size is not a multiple of 4 bytes"; + return false; + } + if ((B2->size() % sizeof(uint32_t)) != 0) { + ErrorMsg = "Expected chunk size is not a multiple of 4 bytes"; + return false; + } + + const uint32_t WordsPerChunk = + static_cast(B1->size() / sizeof(uint32_t)); + if (WordsPerChunk % GroupSize != 0) { + ErrorMsg = "Words per chunk must be a multiple of GroupSize"; + return false; + } + + using PatternTuple = std::tuple; + std::map ActualPatterns; + std::map ExpectedPatterns; + + auto ReadU32 = [](const char *Base, uint32_t WordIndex) -> uint32_t { + uint32_t V; + std::memcpy(&V, Base + WordIndex * sizeof(uint32_t), sizeof(uint32_t)); + return V; + }; + + // Accumulate patterns from all chunks + auto *B1It = B1->Data.begin(); + auto *B2It = B2->Data.begin(); + for (; B1It != B1->Data.end() && B2It != B2->Data.end(); ++B1It, ++B2It) { + const char *ABuf = B1It->get(); // unique_ptr -> char* + const char *EBuf = B2It->get(); + + for (uint32_t I = 0; I + GroupSize <= WordsPerChunk; I += GroupSize) { + if (GroupSize == 3) { + // Actual + const PatternTuple Ap(ReadU32(ABuf, I + 0), ReadU32(ABuf, I + 1), + ReadU32(ABuf, I + 2)); + ++ActualPatterns[Ap]; + + // Expected + const PatternTuple Ep(ReadU32(EBuf, I + 0), ReadU32(EBuf, I + 1), + ReadU32(EBuf, I + 2)); + ++ExpectedPatterns[Ep]; + } else { + // If you plan to support other group sizes later, handle here. + } + } + } + + // Compare pattern multisets + std::stringstream Ss; + bool HasError = false; + + if (ActualPatterns.size() != ExpectedPatterns.size()) { + Ss << "Pattern kind count mismatch: actual has " << ActualPatterns.size() + << " unique patterns, expected has " << ExpectedPatterns.size() + << " unique patterns\n"; + HasError = true; + } + + // Missing / count-mismatched patterns + for (const auto &[pattern, expCount] : ExpectedPatterns) { + auto It = ActualPatterns.find(pattern); + if (It == ActualPatterns.end()) { + if (!HasError) + Ss << "Pattern differences found:\n"; + HasError = true; + Ss << " Missing pattern (combineId=" << std::get<0>(pattern) + << ", maskLow=0x" << std::hex << std::get<1>(pattern) + << ", maskHigh=0x" << std::get<2>(pattern) << std::dec + << ") - expected count: " << expCount << ", actual count: 0\n"; + } else if (It->second != expCount) { + if (!HasError) + Ss << "Pattern differences found:\n"; + HasError = true; + Ss << " Pattern (combineId=" << std::get<0>(pattern) << ", maskLow=0x" + << std::hex << std::get<1>(pattern) << ", maskHigh=0x" + << std::get<2>(pattern) << std::dec + << ") - expected count: " << expCount + << ", actual count: " << It->second << "\n"; + } + } + + // Unexpected patterns + for (const auto &[pattern, actCount] : ActualPatterns) { + if (ExpectedPatterns.find(pattern) == ExpectedPatterns.end()) { + if (!HasError) + Ss << "Pattern differences found:\n"; + HasError = true; + Ss << " Unexpected pattern (combineId=" << std::get<0>(pattern) + << ", maskLow=0x" << std::hex << std::get<1>(pattern) + << ", maskHigh=0x" << std::get<2>(pattern) << std::dec + << ") - expected count: 0, actual count: " << actCount << "\n"; + } + } + + if (HasError) { + ErrorMsg = Ss.str(); + return false; + } + return true; +} + template static std::string bitPatternAsHex64(const T &Val, offloadtest::Rule ComparisonRule) { @@ -391,10 +514,15 @@ llvm::Error verifyResult(offloadtest::Result R) { case offloadtest::Rule::BufferFloatEpsilon: { if (testBufferFloatEpsilon(R.ActualPtr, R.ExpectedPtr, R.Epsilon, R.DM)) return llvm::Error::success(); - - std::ostringstream Oss; - Oss << std::defaultfloat << R.Epsilon; - OS << "Comparison Rule: BufferFloatEpsilon\nEpsilon: " << Oss.str() << "\n"; + break; + } + case offloadtest::Rule::BufferParticipantPattern: { + std::string ErrorMsg; + if (testBufferParticipantPattern(R.ActualPtr, R.ExpectedPtr, R.GroupSize, + ErrorMsg)) + return llvm::Error::success(); + // Return error with detailed message + OS << "Comparison Rule: BufferParticipantPattern\n" << ErrorMsg << "\n"; break; } } diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 4c5f3db0..eba22811 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -108,13 +108,12 @@ void MappingTraits::mapping( template static void setData(IO &I, offloadtest::Buffer &B) { if (I.outputting()) { if (B.ArraySize == 1) { - // single buffer output llvm::MutableArrayRef Arr(reinterpret_cast(B.Data.back().get()), B.Size / sizeof(T)); I.mapRequired("Data", Arr); } else { - // array of buffers output llvm::SmallVector> Arrays; + Arrays.reserve(B.ArraySize); for (const auto &D : B.Data) Arrays.emplace_back(reinterpret_cast(D.get()), B.Size / sizeof(T)); I.mapRequired("Data", Arrays); @@ -122,14 +121,34 @@ template static void setData(IO &I, offloadtest::Buffer &B) { return; } - // zero-initialized buffer(s) - int64_t ZeroInitSize; + int64_t ZeroInitSize = 0; + int64_t SizeElems = 0; + std::optional Fill; I.mapOptional("ZeroInitSize", ZeroInitSize, 0); + I.mapOptional("Fill", Fill); + I.mapOptional("Size", SizeElems, 0); + if (ZeroInitSize > 0) { B.Size = ZeroInitSize; - for (uint32_t I = 0; I < B.ArraySize; I++) { + B.Data.clear(); + for (uint32_t Idx = 0; Idx < B.ArraySize; ++Idx) { B.Data.push_back(std::make_unique(B.Size)); - memset(B.Data.back().get(), 0, B.Size); + std::memset(B.Data.back().get(), 0, B.Size); + } + return; + } + + if (Fill.has_value()) { + if (SizeElems == 0) { + I.setError("'Size' must be provided when using 'Fill'"); + return; + } + B.Size = SizeElems * sizeof(T); + B.Data.clear(); + for (uint32_t Idx = 0; Idx < B.ArraySize; ++Idx) { + B.Data.push_back(std::make_unique(B.Size)); + std::fill_n(reinterpret_cast(B.Data.back().get()), SizeElems, + Fill.value()); } return; } @@ -139,29 +158,43 @@ template static void setData(IO &I, offloadtest::Buffer &B) { llvm::SmallVector Arr; I.mapRequired("Data", Arr); B.Size = Arr.size() * sizeof(T); + B.Data.clear(); B.Data.push_back(std::make_unique(B.Size)); - memcpy(B.Data.back().get(), Arr.data(), B.Size); + std::memcpy(B.Data.back().get(), Arr.data(), B.Size); return; } // array of buffers input llvm::SmallVector> Arrays; I.mapRequired("Data", Arrays); - B.Size = Arrays.back().size() * sizeof(T); - uint32_t ActualSize = 0; - for (auto Arr : Arrays) { + if (Arrays.size() != B.ArraySize) { + I.setError(llvm::Twine("Expected ") + std::to_string(B.ArraySize) + + " buffers, found " + std::to_string(Arrays.size())); + return; + } + + if (Arrays.empty()) { + B.Size = 0; + B.Data.clear(); + for (uint32_t Idx = 0; Idx < B.ArraySize; ++Idx) + B.Data.push_back(std::make_unique(0)); + return; + } + + B.Size = Arrays.front().size() * sizeof(T); + for (const auto &Arr : Arrays) { if (Arr.size() * sizeof(T) != B.Size) { I.setError("All buffers must have the same size."); return; } + } + + B.Data.clear(); + for (const auto &Arr : Arrays) { B.Data.push_back(std::make_unique(B.Size)); - memcpy(B.Data.back().get(), Arr.data(), B.Size); - ActualSize++; + std::memcpy(B.Data.back().get(), Arr.data(), B.Size); } - if (ActualSize != B.ArraySize) - I.setError(Twine("Expected ") + std::to_string(B.ArraySize) + - " buffers, found " + std::to_string(ActualSize)); } // Counter(s) can contain one counter value for a singular resource @@ -347,6 +380,10 @@ void MappingTraits::mapping(IO &I, I.mapOptional("DenormMode", R.DM); break; } + case Rule::BufferParticipantPattern: { + I.mapRequired("GroupSize", R.GroupSize); + break; + } default: break; } diff --git a/test/Basic/mem_conv_atomic_device.test b/test/Basic/mem_conv_atomic_device.test new file mode 100644 index 00000000..89750862 --- /dev/null +++ b/test/Basic/mem_conv_atomic_device.test @@ -0,0 +1,62 @@ +#--- source.hlsl +RWStructuredBuffer write_val : register(u0); +RWStructuredBuffer buf : register(u1); + +[numthreads(256,1,1)] +void main(uint3 TID : SV_DispatchThreadID) { + uint tid = TID.x; + uint temp; + InterlockedExchange(write_val[0], tid, temp); + uint read_val; + InterlockedAdd(write_val[0], 0, read_val); + // Check if all threads in the wave read the same value + buf[tid] = uint(WaveActiveAllEqual(read_val)); +} +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [6553, 1, 1] +Buffers: + - Name: write_val + Format: UInt32 + Stride: 4 + Data: [0] + - Name: buf + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1677568 + - Name: expected + Format: UInt32 + Stride: 4 + Fill: 1 + Size: 1677568 +Results: + - Result: Test1 + Rule: BufferExact + Actual: buf + Expected: expected +DescriptorSets: + - Resources: + - Name: write_val + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: buf + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file diff --git a/test/Basic/mem_conv_atomic_group.test b/test/Basic/mem_conv_atomic_group.test new file mode 100644 index 00000000..6ccbf6b7 --- /dev/null +++ b/test/Basic/mem_conv_atomic_group.test @@ -0,0 +1,53 @@ +#--- source.hlsl +RWStructuredBuffer buf : register(u0); +groupshared uint loc; + +[numthreads(256,1,1)] +void main(uint3 TID : SV_DispatchThreadID) { + uint temp; + loc = 0; + GroupMemoryBarrierWithGroupSync(); + uint tid = TID.x; + InterlockedExchange(loc, tid, temp); + uint read_val; + InterlockedAdd(loc, 0, read_val); + // Check if all threads in the wave read the same value + buf[tid] = uint(WaveActiveAllEqual(read_val)); +} +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [6553, 1, 1] +Buffers: + - Name: buf + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1677568 + - Name: expected + Format: UInt32 + Stride: 4 + Fill: 1 + Size: 1677568 +Results: + - Result: Test1 + Rule: BufferExact + Actual: buf + Expected: expected +DescriptorSets: + - Resources: + - Name: buf + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file diff --git a/test/Basic/mem_conv_device.test b/test/Basic/mem_conv_device.test new file mode 100644 index 00000000..5c807b11 --- /dev/null +++ b/test/Basic/mem_conv_device.test @@ -0,0 +1,61 @@ +#--- source.hlsl +RWStructuredBuffer write_val : register(u0); +RWStructuredBuffer buf : register(u1); + +[numthreads(256,1,1)] +void main(uint3 TID : SV_DispatchThreadID) { + uint tid = TID.x; + uint temp; + InterlockedExchange(write_val[0], tid, temp); + uint read_val = write_val[0]; + // Check if all threads in the wave read the same value + buf[tid] = uint(WaveActiveAllEqual(read_val)); +} +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [6553, 1, 1] +Buffers: + - Name: write_val + Format: UInt32 + Stride: 4 + Data: [0] + - Name: buf + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1677568 + - Name: expected + Format: UInt32 + Stride: 4 + Fill: 1 + Size: 1677568 +Results: + - Result: Test1 + Rule: BufferExact + Actual: buf + Expected: expected +DescriptorSets: + - Resources: + - Name: write_val + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: buf + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file diff --git a/test/Basic/mem_conv_group.test b/test/Basic/mem_conv_group.test new file mode 100644 index 00000000..7b852e05 --- /dev/null +++ b/test/Basic/mem_conv_group.test @@ -0,0 +1,52 @@ +#--- source.hlsl +RWStructuredBuffer buf : register(u0); +groupshared uint loc; + +[numthreads(256,1,1)] +void main(uint3 TID : SV_DispatchThreadID) { + uint temp; + loc = 0; + GroupMemoryBarrierWithGroupSync(); + uint tid = TID.x; + InterlockedExchange(loc, tid, temp); + uint read_val = loc; + // Check if all threads in the wave read the same value + buf[tid] = uint(WaveActiveAllEqual(read_val)); +} +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [6553, 1, 1] +Buffers: + - Name: buf + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1677568 + - Name: expected + Format: UInt32 + Stride: 4 + Fill: 1 + Size: 1677568 +Results: + - Result: Test1 + Rule: BufferExact + Actual: buf + Expected: expected +DescriptorSets: + - Resources: + - Name: buf + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file diff --git a/test/Feature/Attributes/IfBranchAttr.test b/test/Feature/Attributes/IfBranchAttr.test deleted file mode 100644 index 3eca8f8b..00000000 --- a/test/Feature/Attributes/IfBranchAttr.test +++ /dev/null @@ -1,57 +0,0 @@ -#--- source.hlsl -RWBuffer In : register(u0); -RWBuffer Out : register(u1); - -[numthreads(8,1,1)] -void main(uint3 TID : SV_GroupThreadID) { - [flatten] - if (TID.x) { - int X = In[TID.x]; - Out[TID.x] = -X; - } -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Int32 - Data: [ 1, 4, 9, 16, 25, 36, 49, 64] - - Name: Out - Format: Int32 - ZeroInitSize: 32 -DescriptorSets: - - Resources: - - Name: In - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# UNSUPPORTED: Clang-Vulkan -# CHECK: Name: In -# CHECK: Format: Int32 -# CHECK: Data: [ 1, 4, 9, 16, 25, 36, 49, 64 ] -# CHECK: Name: Out -# CHECK: Format: Int32 -# CHECK: Data: [ 0, -4, -9, -16, -25, -36, -49, -64 ] diff --git a/test/Feature/Attributes/IfBranchFlatten.test b/test/Feature/Attributes/IfBranchFlatten.test deleted file mode 100644 index 5ee74f6d..00000000 --- a/test/Feature/Attributes/IfBranchFlatten.test +++ /dev/null @@ -1,57 +0,0 @@ -#--- source.hlsl -RWBuffer In : register(u0); -RWBuffer Out : register(u1); - -[numthreads(8,1,1)] -void main(uint3 TID : SV_GroupThreadID) { - [branch] - if (TID.x) { - int X = In[TID.x]; - Out[TID.x] = -X; - } -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Int32 - Data: [ 1, 4, 9, 16, 25, 36, 49, 64] - - Name: Out - Format: Int32 - ZeroInitSize: 32 -DescriptorSets: - - Resources: - - Name: In - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# UNSUPPORTED: Clang-Vulkan -# CHECK: Name: In -# CHECK: Format: Int32 -# CHECK: Data: [ 1, 4, 9, 16, 25, 36, 49, 64 ] -# CHECK: Name: Out -# CHECK: Format: Int32 -# CHECK: Data: [ 0, -4, -9, -16, -25, -36, -49, -64 ] diff --git a/test/Feature/Attributes/SwitchBranchAttr.test b/test/Feature/Attributes/SwitchBranchAttr.test deleted file mode 100644 index 21d7b926..00000000 --- a/test/Feature/Attributes/SwitchBranchAttr.test +++ /dev/null @@ -1,65 +0,0 @@ -#--- source.hlsl -RWBuffer In : register(u0); -RWBuffer Out : register(u1); - -[numthreads(8,1,1)] -void main(uint3 TID : SV_GroupThreadID) { - [branch] - int X = In[TID.x]; - switch (TID.x % 3) { - case 0: - Out[TID.x] = -X; - break; - case 1: - Out[TID.x] = X * X; - break; - case 2: - Out[TID.x] = X * X * X; - break; - } -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Int32 - Data: [ 1, 2, 3, 4, 5, 6, 7, 8 ] - - Name: Out - Format: Int32 - Data: [ 9, 10, 11, 12, 13, 14, 15, 16 ] -DescriptorSets: - - Resources: - - Name: In - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# UNSUPPORTED: Clang -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: In -# CHECK: Format: Int32 -# CHECK: Data: [ 1, 2, 3, 4, 5, 6, 7, 8 ] -# CHECK: Name: Out -# CHECK: Format: Int32 -# CHECK: Data: [ -1, 4, 27, -4, 25, 216, -7, 64 ] diff --git a/test/Feature/Attributes/SwitchFlattenAttr.test b/test/Feature/Attributes/SwitchFlattenAttr.test deleted file mode 100644 index ee31f81d..00000000 --- a/test/Feature/Attributes/SwitchFlattenAttr.test +++ /dev/null @@ -1,65 +0,0 @@ -#--- source.hlsl -RWBuffer In : register(u0); -RWBuffer Out : register(u1); - -[numthreads(8,1,1)] -void main(uint3 TID : SV_GroupThreadID) { - [flatten] - int X = In[TID.x]; - switch (TID.x % 3) { - case 0: - Out[TID.x] = -X; - break; - case 1: - Out[TID.x] = X * X; - break; - case 2: - Out[TID.x] = X * X * X; - break; - } -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Int32 - Data: [ 1, 2, 3, 4, 5, 6, 7, 8 ] - - Name: Out - Format: Int32 - Data: [ 9, 10, 11, 12, 13, 14, 15, 16 ] -DescriptorSets: - - Resources: - - Name: In - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# UNSUPPORTED: Clang -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: In -# CHECK: Format: Int32 -# CHECK: Data: [ 1, 2, 3, 4, 5, 6, 7, 8 ] -# CHECK: Name: Out -# CHECK: Format: Int32 -# CHECK: Data: [ -1, 4, 27, -4, 25, 216, -7, 64 ] diff --git a/test/Feature/CBuffer/arrays-16bit.test b/test/Feature/CBuffer/arrays-16bit.test deleted file mode 100644 index 67dc11d6..00000000 --- a/test/Feature/CBuffer/arrays-16bit.test +++ /dev/null @@ -1,88 +0,0 @@ -#--- source.hlsl - -cbuffer CBArrays : register(b0) { - uint16_t c1[2][2]; - float16_t c2[1]; -} - -struct Arrays { - uint16_t c1[2][2]; - float16_t c2[1]; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].c1 = c1; - Out[0].c2 = c2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBArrays - Format: Hex16 - Data: [ - 0x0001, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, - 0xffff, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, - 0x1234, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, - 0x0002, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, - 0x3c00, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A, 0x5A5A - ] - - Name: Out - Format: Hex16 - # Warp doesn't seem to be able to handle a stride of 10 so we use 12 here - Stride: 12 - ZeroInitSize: 12 -DescriptorSets: - - Resources: - - Name: CBArrays - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 -... -#--- end - -# REQUIRES: Half, Int16 -# DXC's vulkan support does not layout cbuffers compatibly with DXIL -# UNSUPPORTED: Vulkan - -# https://github.com/llvm/llvm-project/issues/138996 -# XFAIL: Clang - -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: CBArrays -# CHECK: Format: Hex16 - -# CHECK: - Name: Out -# CHECK: Format: Hex16 -# CHECK: Data: [ - -# CHECK: 0x1, -# CHECK-NOT: 0x5A5A, -# CHECK: 0xFFFF, -# CHECK-NOT: 0x5A5A, -# CHECK: 0x1234, -# CHECK-NOT: 0x5A5A, -# CHECK: 0x2, -# CHECK-NOT: 0x5A5A, - -# CHECK: 0x3C00, -# CHECK-NOT: 0x5A5A - -# CHECK: ] diff --git a/test/Feature/CBuffer/arrays-64bit.test b/test/Feature/CBuffer/arrays-64bit.test deleted file mode 100644 index 1971a3df..00000000 --- a/test/Feature/CBuffer/arrays-64bit.test +++ /dev/null @@ -1,99 +0,0 @@ -#--- source.hlsl - -cbuffer CBArrays : register(b0) { - double2 c1[2]; - uint64_t3 c2[2]; - int64_t c3[2]; -} - -struct Arrays { - double2 c1[2]; - uint64_t3 c2[2]; - int64_t c3[2]; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].c1 = c1; - Out[0].c2 = c2; - Out[0].c3 = c3; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBArrays - Format: Hex64 - Data: [ - 0x3ff0000000000000, 0x4008000000000000, - 0x5ff0000000000000, 0x6008000000000000, - 0x1234123412341234, 0x4321432143214321, - 0x0000000000000001, 0x5A5A5A5A5A5A5A5A, - 0x0000000000000001, 0x1234123412341234, - 0x4321432143214321, 0x5A5A5A5A5A5A5A5A, - 0x0000000000000001, 0x5A5A5A5A5A5A5A5A, - 0x0000000000000002, 0x5A5A5A5A5A5A5A5A - ] - - Name: Out - Format: Hex64 - Stride: 96 - ZeroInitSize: 96 -DescriptorSets: - - Resources: - - Name: CBArrays - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 -... -#--- end - -# DXC's vulkan support does not layout cbuffers compatibly with DXIL -# UNSUPPORTED: Vulkan - -# https://github.com/llvm/llvm-project/issues/110722 -# XFAIL: Clang - -# REQUIRES: Double, Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: CBArrays -# CHECK: Format: Hex64 - -# CHECK: - Name: Out -# CHECK: Format: Hex64 -# CHECK: Data: [ - -# CHECK: 0x3FF0000000000000, -# CHECK: 0x4008000000000000, -# CHECK: 0x5FF0000000000000, -# CHECK: 0x6008000000000000, - -# CHECK: 0x1234123412341234, -# CHECK: 0x4321432143214321, -# CHECK: 0x1, -# CHECK-NOT: 0x5A5A5A5A5A5A5A5A, -# CHECK: 0x1, -# CHECK: 0x1234123412341234, -# CHECK: 0x4321432143214321, -# CHECK-NOT: 0x5A5A5A5A5A5A5A5A, - -# CHECK: 0x1, -# CHECK: 0x2 -# CHECK-NOT: 0x5A5A5A5A5A5A5A5A - -# CHECK: ] diff --git a/test/Feature/CBuffer/arrays.test b/test/Feature/CBuffer/arrays.test deleted file mode 100644 index 710b36d8..00000000 --- a/test/Feature/CBuffer/arrays.test +++ /dev/null @@ -1,107 +0,0 @@ -#--- source.hlsl - -cbuffer CBArrays : register(b0) { - float c1[2]; - int4 c2[2][2]; - bool c3[2]; -} - -struct Arrays { - float c1[2]; - int4 c2[2][2]; - bool c3[2]; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].c1 = c1; - Out[0].c2 = c2; - Out[0].c3 = c3; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBArrays - Format: Hex32 - Data: [ - 0x3f800000, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - 0x40800000, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - 0x00000001, 0x00000002, 0x00000003, 0x00000004, - 0x00000005, 0x00000006, 0x00000007, 0x00000008, - 0x00000009, 0x0000000A, 0x0000000B, 0x0000000C, - 0x0000000D, 0x0000000E, 0x0000000F, 0x00000010, - 0x00000000, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - 0x00000001, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - ] - - Name: Out - Format: Hex32 - Stride: 80 - ZeroInitSize: 80 -DescriptorSets: - - Resources: - - Name: CBArrays - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 -... -#--- end - -# DXC's vulkan support does not layout cbuffers compatibly with DXIL -# UNSUPPORTED: Vulkan - -# https://github.com/llvm/llvm-project/issues/110722 -# XFAIL: Clang - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: CBArrays -# CHECK: Format: Hex32 - -# CHECK: - Name: Out -# CHECK: Format: Hex32 -# CHECK: Data: [ - -# CHECK: 0x3F800000, -# CHECK: 0x40800000, -# CHECK-NOT: 0x5A5A5A5A, - -# CHECK: 0x1, -# CHECK: 0x2, -# CHECK: 0x3, -# CHECK: 0x4, -# CHECK: 0x5, -# CHECK: 0x6, -# CHECK: 0x7, -# CHECK: 0x8, -# CHECK: 0x9, -# CHECK: 0xA, -# CHECK: 0xB, -# CHECK: 0xC, -# CHECK: 0xD, -# CHECK: 0xE, -# CHECK: 0xF, -# CHECK: 0x10, - -# CHECK: 0x0, -# CHECK-NOT: 0x5A5A5A5A, - -# CHECK: 0x1 -# CHECK-NOT: 0x5A5A5A5A - -# CHECK: ] diff --git a/test/Feature/CBuffer/lit.local.cfg b/test/Feature/CBuffer/lit.local.cfg deleted file mode 100644 index 1d8227d6..00000000 --- a/test/Feature/CBuffer/lit.local.cfg +++ /dev/null @@ -1,7 +0,0 @@ -if 'Clang-Vulkan' in config.available_features: - config.unsupported = True - -# CBuffer bindings seem to be broken under metal -# https://github.com/llvm/offload-test-suite/issues/55 -if 'Metal' in config.available_features: - config.unsupported = True diff --git a/test/Feature/CBuffer/scalars-16bit.test b/test/Feature/CBuffer/scalars-16bit.test deleted file mode 100644 index cb727988..00000000 --- a/test/Feature/CBuffer/scalars-16bit.test +++ /dev/null @@ -1,70 +0,0 @@ -#--- source.hlsl - -cbuffer CBScalars : register(b0) { - float16_t a1; - uint16_t a2; - int16_t a3; -} - -struct Scalars { - float16_t a1; - uint16_t a2; - int16_t a3; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].a1 = a1; - Out[0].a2 = a2; - Out[0].a3 = a3; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBScalars - Format: Hex16 - Data: [ 0x3c00, 0x0001, 0xffff ] - - Name: Out - Format: Hex16 - # Warp doesn't seem to be able to handle a stride of 6 so we use 8 here... - Stride: 8 - ZeroInitSize: 8 -DescriptorSets: - - Resources: - - Name: CBScalars - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half, Int16 - -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: CBScalars -# CHECK: Format: Hex16 - -# CHECK: - Name: Out -# CHECK: Format: Hex16 -# CHECK: Data: [ 0x3C00, 0x1, 0xFFFF, 0x0 ] diff --git a/test/Feature/CBuffer/scalars-64bit.test b/test/Feature/CBuffer/scalars-64bit.test deleted file mode 100644 index 736ed2af..00000000 --- a/test/Feature/CBuffer/scalars-64bit.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -cbuffer CBScalars : register(b0) { - double a1; - uint64_t a2; - int64_t a3; -} - -struct Scalars { - double a1; - uint64_t a2; - int64_t a3; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].a1 = a1; - Out[0].a2 = a2; - Out[0].a3 = a3; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBScalars - Format: Hex64 - Data: [ 0xbff0000000000000, 0x0000000000000001, 0xffffffffffffffff ] - - Name: Out - Format: Hex64 - Stride: 24 - ZeroInitSize: 24 -DescriptorSets: - - Resources: - - Name: CBScalars - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Double, Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: CBScalars -# CHECK: Format: Hex64 -# CHECK: Name: Out -# CHECK: Format: Hex64 -# CHECK: Data: [ 0xBFF0000000000000, 0x1, 0xFFFFFFFFFFFFFFFF ] diff --git a/test/Feature/CBuffer/scalars.test b/test/Feature/CBuffer/scalars.test deleted file mode 100644 index 45c4e104..00000000 --- a/test/Feature/CBuffer/scalars.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl - -cbuffer CBScalars : register(b0) { - float a1; - int a2; - bool a3; -} - -struct Scalars { - float a1; - int a2; - bool a3; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].a1 = a1; - Out[0].a2 = a2; - Out[0].a3 = a3; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBScalars - Format: Hex32 - Data: [ 0xbf800000, 0x0000002a, 0x00000001 ] - - Name: Out - Format: Hex32 - Stride: 12 - ZeroInitSize: 12 -DescriptorSets: - - Resources: - - Name: CBScalars - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: CBScalars -# CHECK: Format: Hex32 - -# CHECK: - Name: Out -# CHECK: Format: Hex32 -# CHECK: Data: [ 0xBF800000, 0x2A, 0x1 ] diff --git a/test/Feature/CBuffer/structs.test b/test/Feature/CBuffer/structs.test deleted file mode 100644 index 4b48956e..00000000 --- a/test/Feature/CBuffer/structs.test +++ /dev/null @@ -1,138 +0,0 @@ -#--- source.hlsl - -struct X { - int a1; -}; - -struct Y : X { - int2 a2; -}; - -struct Z { - X xs[2]; - Y y; -}; - -cbuffer CBStructs : register(b0) { - X x1; - X x2; - Y y; - Z zs[2]; -}; - -struct S { - int x1a1; - int x2a1; - int ya1; - int2 ya2; - int z1x1a1; - int z1x2a1; - int z1ya1; - int2 z1ya2; - int z2x1a1; - int z2x2a1; - int z2ya1; - int2 z2ya2; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].x1a1 = x1.a1; - Out[0].x2a1 = x2.a1; - Out[0].ya1 = y.a1; - Out[0].ya2 = y.a2; - Out[0].z1x1a1 = zs[0].xs[0].a1; - Out[0].z1x2a1 = zs[0].xs[1].a1; - Out[0].z1ya1 = zs[0].y.a1; - Out[0].z1ya2 = zs[0].y.a2; - Out[0].z2x1a1 = zs[1].xs[0].a1; - Out[0].z2x2a1 = zs[1].xs[1].a1; - Out[0].z2ya1 = zs[1].y.a1; - Out[0].z2ya2 = zs[1].y.a2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBVectors - Format: Hex32 - Data: [ - 0x1, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - 0x2, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - 0x3, 0x4, 0x5, 0x5A5A5A5A, - 0x6, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - 0x7, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - 0x8, 0x9, 0xA, 0x5A5A5A5A, - 0xB, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - 0xC, 0x5A5A5A5A, 0x5A5A5A5A, 0x5A5A5A5A, - 0xD, 0xE, 0xF, 0x5A5A5A5A, - ] - - Name: Out - Format: Hex32 - Stride: 60 - ZeroInitSize: 60 -DescriptorSets: - - Resources: - - Name: CBVectors - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 -... -#--- end - -# DXC's vulkan support does not layout cbuffers compatibly with DXIL -# UNSUPPORTED: Vulkan - -+# Clang trips on 2-element vectors in structs: -+# https://github.com/llvm/llvm-project/issues/123968 -+# XFAIL: Clang - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: CBVectors -# CHECK: Format: Hex32 - -# CHECK: - Name: Out -# CHECK: Format: Hex32 -# CHECK: Data: [ -# CHECK: 0x1, -# CHECK-NOT: 0x5A5A5A5A, -# CHECK: 0x2, -# CHECK-NOT: 0x5A5A5A5A, -# CHECK: 0x3, -# CHECK: 0x4, -# CHECK: 0x5, -# CHECK-NOT: 0x5A5A5A5A, -# CHECK: 0x6, -# CHECK-NOT: 0x5A5A5A5A, -# CHECK: 0x7, -# CHECK-NOT: 0x5A5A5A5A, -# CHECK: 0x8, -# CHECK: 0x9, -# CHECK: 0xA, -# CHECK-NOT: 0x5A5A5A5A, -# CHECK: 0xB, -# CHECK-NOT: 0x5A5A5A5A, -# CHECK: 0xC, -# CHECK-NOT: 0x5A5A5A5A, -# CHECK: 0xD, -# CHECK: 0xE, -# CHECK: 0xF -# CHECK-NOT: 0x5A5A5A5A -# CHECK: ] diff --git a/test/Feature/CBuffer/vectors-16bit.test b/test/Feature/CBuffer/vectors-16bit.test deleted file mode 100644 index 91571851..00000000 --- a/test/Feature/CBuffer/vectors-16bit.test +++ /dev/null @@ -1,80 +0,0 @@ -#--- source.hlsl - -cbuffer CBVectors : register(b0) { - float16_t2 b1; - uint16_t3 b2; -} - -struct Vectors { - float16_t2 b1; - uint16_t3 b2; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].b1 = b1; - Out[0].b2 = b2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBVectors - Format: Hex16 - Data: [ - 0x3c00, 0x3555, - 0x0001, 0xffff, 0x1234 - ] - - Name: Out - Format: Hex16 - # Warp doesn't seem to be able to handle a stride of 10 so we use 12 here - Stride: 12 - ZeroInitSize: 12 -DescriptorSets: - - Resources: - - Name: CBVectors - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half, Int16 - -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: CBVectors -# CHECK: Format: Hex16 - -# CHECK: - Name: Out -# CHECK: Format: Hex16 -# CHECK: Data: [ - -# CHECK: 0x3C00, -# CHECK: 0x3555, - -# CHECK: 0x1, -# CHECK: 0xFFFF, -# CHECK: 0x1234 - -# CHECK: ] diff --git a/test/Feature/CBuffer/vectors-64bit.test b/test/Feature/CBuffer/vectors-64bit.test deleted file mode 100644 index 701dfcfe..00000000 --- a/test/Feature/CBuffer/vectors-64bit.test +++ /dev/null @@ -1,96 +0,0 @@ -#--- source.hlsl - -cbuffer CBVectors { - double3 b1; - uint64_t3 b2; - int64_t3 b3; -} - -struct Vectors { - double3 b1; - uint64_t3 b2; - int64_t3 b3; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].b1 = b1; - Out[0].b2 = b2; - Out[0].b3 = b3; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBVectors - Format: Hex64 - Data: [ - 0x3ff0000000000000, 0x4000000000000000, - 0x4008000000000000, 0x5A5A5A5A5A5A5A5A, - 0x0000000000000001, 0x0000000000001111, - 0x0000000000002222, 0x5A5A5A5A5A5A5A5A, - 0x000000000000000A, 0x000000000000000B, - 0x000000000000000C, 0x5A5A5A5A5A5A5A5A, - ] - - Name: Out - Format: Hex64 - Stride: 72 - ZeroInitSize: 72 -DescriptorSets: - - Resources: - - Name: CBVectors - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 -... -#--- end - -# REQUIRES: Double, Int64 -# DXC's vulkan support does not layout cbuffers compatibly with DXIL -# UNSUPPORTED: Vulkan - -# Clang trips on 3-element vectors in structs: -# https://github.com/llvm/llvm-project/issues/123968 -# XFAIL: Clang - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: CBVectors -# CHECK: Format: Hex64 - -# CHECK: - Name: Out -# CHECK: Format: Hex64 -# CHECK: Data: [ - -# CHECK: 0x3FF0000000000000, -# CHECK: 0x4000000000000000, -# CHECK: 0x4008000000000000, -# CHECK-NOT: 0x5A5A5A5A5A5A5A5A, - -# CHECK: 0x1, -# CHECK: 0x1111, -# CHECK: 0x2222, -# CHECK-NOT: 0x5A5A5A5A5A5A5A5A, - -# CHECK: 0xA, -# CHECK: 0xB, -# CHECK: 0xC -# CHECK-NOT: 0x5A5A5A5A5A5A5A5A - -# CHECK: ] diff --git a/test/Feature/CBuffer/vectors.test b/test/Feature/CBuffer/vectors.test deleted file mode 100644 index fa4985b1..00000000 --- a/test/Feature/CBuffer/vectors.test +++ /dev/null @@ -1,100 +0,0 @@ -#--- source.hlsl - -cbuffer CBVectors : register(b0) { - float3 b1; - int4 b2; - uint2 b3; - bool4 b4; -} - -struct Vectors { - float3 b1; - int4 b2; - uint2 b3; - bool4 b4; -}; - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0].b1 = b1; - Out[0].b2 = b2; - Out[0].b3 = b3; - Out[0].b4 = b4; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: CBVectors - Format: Hex32 - Data: [ - 0x3f800000, 0x40000000, 0x40400000, 0x5A5A5A5A, - 0x00000001, 0x00000002, 0x00000003, 0x00000004, - 0x00000001, 0x00000002, 0x5A5A5A5A, 0x5A5A5A5A, - 0x00000001, 0x00000000, 0x00000001, 0x00000000 - ] - - Name: Out - Format: Hex32 - Stride: 52 - ZeroInitSize: 52 -DescriptorSets: - - Resources: - - Name: CBVectors - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 -... -#--- end - -# DXC's vulkan support does not layout cbuffers compatibly with DXIL -# UNSUPPORTED: Vulkan - -# Clang trips on 3-element vectors in structs: -# https://github.com/llvm/llvm-project/issues/123968 -# XFAIL: Clang - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: CBVectors -# CHECK: Format: Hex32 - -# CHECK: - Name: Out -# CHECK: Format: Hex32 -# CHECK: Data: [ - -# CHECK: 0x3F800000, -# CHECK: 0x40000000, -# CHECK: 0x40400000, -# CHECK-NOT: 0x5A5A5A5A, - -# CHECK: 0x1, -# CHECK: 0x2, -# CHECK: 0x3, -# CHECK: 0x4, - -# CHECK: 0x1, -# CHECK: 0x2, -# CHECK-NOT: 0x5A5A5A5A, - -# CHECK: 0x1, -# CHECK: 0x0, -# CHECK: 0x1, -# CHECK: 0x0 - -# CHECK: ] diff --git a/test/Feature/HLSLLib/D3DCOLORtoUBYTE4.test b/test/Feature/HLSLLib/D3DCOLORtoUBYTE4.test deleted file mode 100644 index ae47287c..00000000 --- a/test/Feature/HLSLLib/D3DCOLORtoUBYTE4.test +++ /dev/null @@ -1,59 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -RWStructuredBuffer Out0 : register(u1); - -[numthreads(1,1,1)] -void main() { - Out0[0] = D3DCOLORtoUBYTE4(In0[0]); - Out0[1] = D3DCOLORtoUBYTE4(float4(0, 11.11, -50.5, 100)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float32 - Stride: 16 - Data: [0, 11.11, -50.5, 100] - - Name: Out0 - Format: UInt32 - Stride: 16 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: UInt32 - Stride: 16 - Data: [ 4294954419, 2833, 0, 25500, 4294954419, 2833, 0, 25500 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan && !VK_KHR_shader_float_controls2 - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/abs.32.test b/test/Feature/HLSLLib/abs.32.test deleted file mode 100644 index 5278af73..00000000 --- a/test/Feature/HLSLLib/abs.32.test +++ /dev/null @@ -1,142 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -StructuredBuffer In3 : register(t2); -RWStructuredBuffer Out1 : register(u3); -RWStructuredBuffer Out2 : register(u4); -RWStructuredBuffer Out3 : register(u5); - -[numthreads(1,1,1)] -void main() { - // int - Out1[0] = abs(In1[0]); - int4 Tmp = {abs(In1[0].xyz), abs(In1[0].w)}; - Out1[1] = Tmp; - Out1[2].xy = abs(In1[0].xy); - - // uint - Out2[0] = abs(In2[0]); - uint4 Tmp2 = {abs(In2[0].xyz), abs(In2[0].w)}; - Out2[1] = Tmp2; - Out2[2].xy = abs(In2[0].xy); - - // float - Out3[0] = abs(In3[0]); - float4 Tmp3 = {abs(In3[1].xyz), abs(In3[1].w)}; - Out3[1] = Tmp3; - float4 Tmp4 = {abs(In3[2].xy), abs(In3[2].zw)}; - Out3[2] = Tmp4; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Int32 - Stride: 16 - Data: [-1, 0, -2147483648, 2147483647] - - Name: In2 - Format: UInt32 - Stride: 16 - Data: [1, 0xffffffff, 0, 10] - - Name: In3 - Format: Float32 - Stride: 16 - Data: [nan, -nan, 0, -0, -1.3, inf, -inf, 0x1.e7d42cp-127, -0x1.e7d42cp-127, -0.5, -0.05, -19] - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut1 # The result we expect - Format: Int32 - Stride: 16 - Data: [1, 0, -2147483648, 2147483647, 1, 0, -2147483648, 2147483647, 1, 0, 0, 0] # Last two are filler - - Name: Out2 - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut2 # The result we expect - Format: UInt32 - Stride: 16 - Data: [1, 4294967295, 0, 10, 1, 4294967295, 0, 10, 1, 4294967295, 0, 0] # Last two are filler - - Name: Out3 - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut3 # The result we expect - Format: Float32 - Stride: 16 - Data: [nan, nan, 0, 0, 1.3, inf, inf, 0x1.e7d42cp-127, 0x1.e7d42cp-127, 0.5, 0.05, 19] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 - - Result: Test3 - Rule: BufferFloatULP - ULPT: 0 - Actual: Out3 - Expected: ExpectedOut3 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: In3 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out3 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -... -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7512 -# XFAIL: DXC-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/abs.fp16.test b/test/Feature/HLSLLib/abs.fp16.test deleted file mode 100644 index 86784e1c..00000000 --- a/test/Feature/HLSLLib/abs.fp16.test +++ /dev/null @@ -1,64 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -RWStructuredBuffer Out1 : register(u1); - -[numthreads(1,1,1)] -void main() { - Out1[0] = abs(In1[0]); - half4 Tmp = {abs(In1[1].xyz), abs(In1[1].w)}; - Out1[1] = Tmp; - half4 Tmp2 = {abs(In1[2].xy), abs(In1[2].zw)}; - Out1[2] = Tmp2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Float16 - Stride: 8 - Data: [0x7e00, 0xfe00, 0x0000, 0x8000, 0xbd33, 0x7c00, 0xfc00, 0x0001, 0x8001, 0xb800, 0xaa66, 0xccc0] - # nan, -nan, 0, -0, -1.3, inf, -inf, denorm, -denorm, -0.5, -0.05, -19 - - Name: Out1 - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut1 # The result we expect - Format: Float16 - Stride: 8 - Data: [0x7e00, 0x7e00, 0x0000, 0x0000, 0x3d33, 0x7c00, 0x7c00, 0x0001, 0x0001, 0x3800, 0x2a66, 0x4cc0] -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 0 - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/abs.fp64.test b/test/Feature/HLSLLib/abs.fp64.test deleted file mode 100644 index 96745611..00000000 --- a/test/Feature/HLSLLib/abs.fp64.test +++ /dev/null @@ -1,63 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -RWStructuredBuffer Out1 : register(u1); - -[numthreads(1,1,1)] -void main() { - Out1[0] = abs(In1[0]); - double4 Tmp = {abs(In1[1].xyz), abs(In1[1].w)}; - Out1[1] = Tmp; - double4 Tmp2 = {abs(In1[2].xy), abs(In1[2].zw)}; - Out1[2] = Tmp2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Float64 - Stride: 32 - Data: [nan, -nan, 0, -0, -1.3, inf, -inf, 0x0.fffffffffffffp-1022, -0x0.fffffffffffffp-1022, -0.5, -0.05, -19] - - Name: Out1 - Format: Float64 - Stride: 32 - ZeroInitSize: 96 - - Name: ExpectedOut1 # The result we expect - Format: Float64 - Stride: 32 - Data: [nan, nan, 0, 0, 1.3, inf, inf, 0x0.fffffffffffffp-1022, 0x0.fffffffffffffp-1022, 0.5, 0.05, 19] -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 0 - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/abs.int16.test b/test/Feature/HLSLLib/abs.int16.test deleted file mode 100644 index 4a080d8a..00000000 --- a/test/Feature/HLSLLib/abs.int16.test +++ /dev/null @@ -1,103 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out1 : register(u2); -RWStructuredBuffer Out2 : register(u3); - -[numthreads(1,1,1)] -void main() { - // int16_t - Out1[0] = abs(In1[0]); - int16_t4 Tmp = {abs(In1[0].xyz), abs(In1[0].w)}; - Out1[1] = Tmp; - Out1[2].xy = abs(In1[0].xy); - - // uint16_t - Out2[0] = abs(In2[0]); - uint16_t4 Tmp2 = {abs(In2[0].xyz), abs(In2[0].w)}; - Out2[1] = Tmp2; - Out2[2].xy = abs(In2[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Int16 - Stride: 8 - Data: [-1, 0, -32768, 32767] - - Name: In2 - Format: UInt16 - Stride: 8 - Data: [1, 65535, 0, 10] - - Name: Out1 - Format: Int16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut1 # The result we expect - Format: Int16 - Stride: 8 - Data: [1, 0, -32768, 32767, 1, 0, -32768, 32767, 1, 0, 0, 0] # Last two are filler - - Name: Out2 - Format: UInt16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut2 # The result we expect - Format: UInt16 - Stride: 8 - Data: [1, 65535, 0, 10, 1, 65535, 0, 10, 1, 65535, 0, 0] # Last two are filler -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7512 -# XFAIL: DXC-Vulkan - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/abs.int64.test b/test/Feature/HLSLLib/abs.int64.test deleted file mode 100644 index 19ff3855..00000000 --- a/test/Feature/HLSLLib/abs.int64.test +++ /dev/null @@ -1,103 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out1 : register(u2); -RWStructuredBuffer Out2 : register(u3); - -[numthreads(1,1,1)] -void main() { - // int64_t - Out1[0] = abs(In1[0]); - int64_t4 Tmp = {abs(In1[0].xyz), abs(In1[0].w)}; - Out1[1] = Tmp; - Out1[2].xy = abs(In1[0].xy); - - // uint64_t - Out2[0] = abs(In2[0]); - uint64_t4 Tmp2 = {abs(In2[0].xyz), abs(In2[0].w)}; - Out2[1] = Tmp2; - Out2[2].xy = abs(In2[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Int64 - Stride: 32 - Data: [-1, 0, -9223372036854775808, 9223372036854775807] - - Name: In2 - Format: UInt64 - Stride: 32 - Data: [1, 0xFFFFFFFFFFFFFFFF, 0, 10] - - Name: Out1 - Format: Int64 - Stride: 32 - ZeroInitSize: 96 - - Name: ExpectedOut1 # The result we expect - Format: Int64 - Stride: 32 - Data: [1, 0, -9223372036854775808, 9223372036854775807, 1, 0, -9223372036854775808, 9223372036854775807, 1, 0, 0, 0] # Last two are filler - - Name: Out2 - Format: UInt64 - Stride: 32 - ZeroInitSize: 96 - - Name: ExpectedOut2 # The result we expect - Format: UInt64 - Stride: 32 - Data: [1, 0xFFFFFFFFFFFFFFFF, 0, 10, 1, 0xFFFFFFFFFFFFFFFF, 0, 10, 1, 0xFFFFFFFFFFFFFFFF, 0, 0] # Last two are filler -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7512 -# XFAIL: DXC-Vulkan - -# REQUIRES: Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/acos.16.test b/test/Feature/HLSLLib/acos.16.test deleted file mode 100644 index 15f5934f..00000000 --- a/test/Feature/HLSLLib/acos.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = acos(In[0]); - half4 Tmp = {acos(In[1].xyz), acos(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {acos(In[2].xy), acos(In[2].zw)}; - Out[2] = Tmp2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [0x7e00, 0xfc00, 0x8001, 0x8000, 0x0, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x3e00, 0xbe00, 0x7e00] - # nan, -inf, -denorm, -0, 0, denorm, inf, 1, -1, 1.5, -1.5, nan (filler) - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [0x7e00, 0x7e00, 0x3e48, 0x3e48, 0x3e48, 0x3e48, 0x7e00, 0x0, 0x4248, 0x7e00, 0x7e00, 0x7e00] - # nan, nan, 1.570796, 1.570796, 1.570796, 1.570796, nan, 0, 3.1415926, nan, nan, nan -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0010 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half - -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/acos.32.test b/test/Feature/HLSLLib/acos.32.test deleted file mode 100644 index 4f468c89..00000000 --- a/test/Feature/HLSLLib/acos.32.test +++ /dev/null @@ -1,65 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = acos(In[0]); - float4 Tmp = {acos(In[1].xyz), acos(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {acos(In[2].xy), acos(In[2].zw)}; - Out[2] = Tmp2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 1, -1, 1.5, -1.5, nan] - # nan, -inf, -denorm, -0, 0, denorm, inf, 1, -1, 1.5, -1.5, nan - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [nan, nan, 0x1.921fb6p+0, 0x1.921fb6p+0, 0x1.921fb6p+0, 0x1.921fb6p+0, nan, 0, 3.1415926, nan, nan, nan] - #[nan, nan, 1.570796, 1.570796, 1.570796, 1.570796, nan, 0, 3.14, nan, nan, nan -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/adduint64.test b/test/Feature/HLSLLib/adduint64.test deleted file mode 100644 index d0b4e908..00000000 --- a/test/Feature/HLSLLib/adduint64.test +++ /dev/null @@ -1,76 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - // vec4 - Out[0] = AddUint64(In[0], In[0]); - - // vec2 - uint4 Tmp = {AddUint64(In[1].xy, In[1].xy), AddUint64(In[1].zw, In[1].zw)}; - Out[1] = Tmp; - uint4 Tmp2 = {AddUint64(In[2].xy, In[2].xy), AddUint64(In[2].zw, In[2].zw)}; - Out[2] = Tmp2; - uint4 Tmp3 = {AddUint64(uint2(0xffffffff, 0xffffffff), uint2(1, 0)), AddUint64(uint2(1, 256), uint2(1, 256))}; - Out[3] = Tmp3; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: UInt32 - Stride: 16 - Data: [ 1, 0, 1, 256, 10000, 10001, 0x80000000, 1, 0x7fffffff, 0x7fffffff, 0xffffffff, 0x7fffffff ] - # 1, 0, 1, 256, 10000, 10001, 2147483648, 1, 2147483647, 2147483647, 4294967295, 2147483647 - - Name: Out - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - Data: [ 2, 0, 2, 512, 20000, 20002, 0, 3, 0xfffffffe, 0xfffffffe, 0xfffffffe, 0xffffffff, 0, 0, 2, 512 ] - # 2, 0, 2, 512, 20000, 20002, 0, 3, 4294967294, 4294967294, 4294967294, 4294967295, 0, 0, 2, 512 -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/llvm/offload-test-suite/issues/292 -# XFAIL: DXC-Metal - -https://github.com/llvm/offload-test-suite/issues/344 -# XFAIL: Clang-Metal - -# UNSUPPORTED: DXC-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/all.32.test b/test/Feature/HLSLLib/all.32.test deleted file mode 100644 index 6c75814b..00000000 --- a/test/Feature/HLSLLib/all.32.test +++ /dev/null @@ -1,142 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); -StructuredBuffer In2 : register(t2); - -RWStructuredBuffer Out0 : register(u3); -RWStructuredBuffer Out1 : register(u4); -RWStructuredBuffer Out2 : register(u5); - - -[numthreads(1,1,1)] -void main() { - // float - Out0[0] = all(In0[0]); - Out0[1] = all(In0[1].xyz); - Out0[2] = all(In0[1].w); - Out0[3] = all(In0[2].xy); - Out0[4] = all(In0[2].zw); - Out0[5] = all(float4(1.0, -2.5, 0, 4.2)); - - // int - Out1[0] = all(In1[0]); - Out1[1] = all(In1[1].xyz); - Out1[2] = all(In1[1].w); - Out1[3] = all(In1[2].xy); - Out1[4] = all(In1[2].zw); - Out1[5] = all(int4(1, -2, 0, 4)); - - // uint - Out2[0] = all(In2[0]); - Out2[1] = all(In2[1].xyz); - Out2[2] = all(In2[1].w); - Out2[3] = all(In2[2].xy); - Out2[4] = all(In2[2].zw); - Out2[5] = all(uint4(1, 2, 0, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float32 - Stride: 16 - Data: [ 1.0, -2.5, 0, 4.5, 2.0, 3.0, 4.0, -5.5, 0, 0, -0.01, 0.01 ] - - Name: In1 - Format: Int32 - Stride: 16 - Data: [ 1, -2, 0, 4, 2, 3, 4, -5, 0, 0, -1, 1 ] - - Name: In2 - Format: UInt32 - Stride: 16 - Data: [ 1, 2, 0, 4, 2, 3, 4, 5, 0, 0, 100, 1 ] - - Name: Out0 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut0 - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] - - Name: Out1 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut1 - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] - - Name: Out2 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut2 - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/all.bool.test b/test/Feature/HLSLLib/all.bool.test deleted file mode 100644 index c3dcf2d5..00000000 --- a/test/Feature/HLSLLib/all.bool.test +++ /dev/null @@ -1,64 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = all(In[0]); - Out[1] = all(In[1].xyz); - Out[2] = all(In[1].w); - Out[3] = all(In[2].xy); - Out[4] = all(In[2].zw); - Out[5] = all(bool4(1, 1, 0, 1)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Bool - Stride: 16 - Data: [ 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1 ] - - Name: Out - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# https://github.com/llvm/llvm-project/issues/140824 -# XFAIL: Clang - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/all.fp16.test b/test/Feature/HLSLLib/all.fp16.test deleted file mode 100644 index f7fc1ffc..00000000 --- a/test/Feature/HLSLLib/all.fp16.test +++ /dev/null @@ -1,63 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = all(In[0]); - Out[1] = all(In[1].xyz); - Out[2] = all(In[1].w); - Out[3] = all(In[2].xy); - Out[4] = all(In[2].zw); - Out[5] = all(half4(1.0, -2.5, 0, 4.2)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0xc100, 0x0000, 0xc480, 0x4000, 0x4200, 0x4400, 0xc580, 0x0000, 0x0000, 0xa11f, 0x211f ] - # 1.0, -2.5, 0, 4.5, 2.0, 3.0, 4.0, -5.5, 0, 0, -0.01, 0.01 - - Name: Out - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/all.fp64.test b/test/Feature/HLSLLib/all.fp64.test deleted file mode 100644 index c2dd26c2..00000000 --- a/test/Feature/HLSLLib/all.fp64.test +++ /dev/null @@ -1,62 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = all(In[0]); - Out[1] = all(In[1].xyz); - Out[2] = all(In[1].w); - Out[3] = all(In[2].xy); - Out[4] = all(In[2].zw); - Out[5] = all(double4(1.0, -2.5, 0, 4.2)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float64 - Stride: 32 - Data: [ 1.0, -2.5, 0, 4.5, 2.0, 3.0, 4.0, -5.5, 0, 0, -0.01, 0.01 ] - - Name: Out - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/all.int16.test b/test/Feature/HLSLLib/all.int16.test deleted file mode 100644 index b4bfd8cc..00000000 --- a/test/Feature/HLSLLib/all.int16.test +++ /dev/null @@ -1,103 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); - -RWStructuredBuffer Out0 : register(u2); -RWStructuredBuffer Out1 : register(u3); - - -[numthreads(1,1,1)] -void main() { - // int16_t - Out0[0] = all(In0[0]); - Out0[1] = all(In0[1].xyz); - Out0[2] = all(In0[1].w); - Out0[3] = all(In0[2].xy); - Out0[4] = all(In0[2].zw); - Out0[5] = all(int16_t4(1, -2, 0, 4)); - - // uint16_t - Out1[0] = all(In1[0]); - Out1[1] = all(In1[1].xyz); - Out1[2] = all(In1[1].w); - Out1[3] = all(In1[2].xy); - Out1[4] = all(In1[2].zw); - Out1[5] = all(uint16_t4(1, 2, 0, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Int16 - Stride: 8 - Data: [ 1, -2, 0, 4, 2, 3, 4, -5, 0, 0, -1, 1 ] - - Name: In1 - Format: UInt16 - Stride: 8 - Data: [ 1, 2, 0, 4, 2, 3, 4, 5, 0, 0, 100, 1 ] - - Name: Out0 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut0 - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] - - Name: Out1 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut1 - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/all.int64.test b/test/Feature/HLSLLib/all.int64.test deleted file mode 100644 index 3324a661..00000000 --- a/test/Feature/HLSLLib/all.int64.test +++ /dev/null @@ -1,103 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); - -RWStructuredBuffer Out0 : register(u2); -RWStructuredBuffer Out1 : register(u3); - - -[numthreads(1,1,1)] -void main() { - // int64_t - Out0[0] = all(In0[0]); - Out0[1] = all(In0[1].xyz); - Out0[2] = all(In0[1].w); - Out0[3] = all(In0[2].xy); - Out0[4] = all(In0[2].zw); - Out0[5] = all(int64_t4(1, -2, 0, 4)); - - // uint64_t - Out1[0] = all(In1[0]); - Out1[1] = all(In1[1].xyz); - Out1[2] = all(In1[1].w); - Out1[3] = all(In1[2].xy); - Out1[4] = all(In1[2].zw); - Out1[5] = all(uint64_t4(1, 2, 0, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Int64 - Stride: 32 - Data: [ 1, -2, 0, 4, 2, 3, 4, -5, 0, 0, -1, 1 ] - - Name: In1 - Format: UInt64 - Stride: 32 - Data: [ 1, 2, 0, 4, 2, 3, 4, 5, 0, 0, 100, 1 ] - - Name: Out0 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut0 - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] - - Name: Out1 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut1 - Format: Bool - Stride: 4 - Data: [ 0, 1, 1, 0, 1, 0 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# REQUIRES: Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/and.test b/test/Feature/HLSLLib/and.test deleted file mode 100644 index 702cfd09..00000000 --- a/test/Feature/HLSLLib/and.test +++ /dev/null @@ -1,76 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = and(In1[0], In2[0]); - bool4 Tmp = {and(In1[0].xyz, In2[0].xyz), and(In1[0].w, In2[0].w)}; - Out[1] = Tmp; - Out[2].xy = and(In1[0].xy, In2[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Bool - Stride: 16 - Data: [1, 0, 1, 0] - - Name: In2 - Format: Bool - Stride: 16 - Data: [1, 0, 0, 1] - - Name: Out - Format: Bool - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Bool - Stride: 16 - Data: [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0] # Last two are filler -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/140824 -# XFAIL: Clang -# https://github.com/microsoft/DirectXShaderCompiler/issues/7475 -# XFAIL: DXC-Vulkan -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/any.32.test b/test/Feature/HLSLLib/any.32.test deleted file mode 100644 index 0fde5548..00000000 --- a/test/Feature/HLSLLib/any.32.test +++ /dev/null @@ -1,142 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); -StructuredBuffer In2 : register(t2); - -RWStructuredBuffer Out0 : register(u3); -RWStructuredBuffer Out1 : register(u4); -RWStructuredBuffer Out2 : register(u5); - - -[numthreads(1,1,1)] -void main() { - // float - Out0[0] = any(In0[0]); - Out0[1] = any(In0[1].xyz); - Out0[2] = any(In0[1].w); - Out0[3] = any(In0[2].xy); - Out0[4] = any(In0[2].zw); - Out0[5] = any(float4(0, 0, 0, 4.5)); - - // int - Out1[0] = any(In1[0]); - Out1[1] = any(In1[1].xyz); - Out1[2] = any(In1[1].w); - Out1[3] = any(In1[2].xy); - Out1[4] = any(In1[2].zw); - Out1[5] = any(int4(0, 0, 0, 4)); - - // uint - Out2[0] = any(In2[0]); - Out2[1] = any(In2[1].xyz); - Out2[2] = any(In2[1].w); - Out2[3] = any(In2[2].xy); - Out2[4] = any(In2[2].zw); - Out2[5] = any(uint4(0, 0, 0, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float32 - Stride: 16 - Data: [ 0, 0, 0, 4.5, 0, 0, 0, -5.5, 0, 1.0, -0.01, 0.01] - - Name: In1 - Format: Int32 - Stride: 16 - Data: [ 0, 0, 0, 4, 0, 0, 0, -5, 0, 2, -1, 1 ] - - Name: In2 - Format: UInt32 - Stride: 16 - Data: [ 0, 0, 0, 4, 0, 0, 0, 5, 0, 2, 100, 1 ] - - Name: Out0 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut0 - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] - - Name: Out1 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut1 - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] - - Name: Out2 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut2 - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/any.bool.test b/test/Feature/HLSLLib/any.bool.test deleted file mode 100644 index 78aa9b17..00000000 --- a/test/Feature/HLSLLib/any.bool.test +++ /dev/null @@ -1,64 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = any(In[0]); - Out[1] = any(In[1].xyz); - Out[2] = any(In[1].w); - Out[3] = any(In[2].xy); - Out[4] = any(In[2].zw); - Out[5] = any(bool4(0, 0, 0, 1)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Bool - Stride: 16 - Data: [ 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1 ] - - Name: Out - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# Bug https://github.com/llvm/llvm-project/issues/140824 -# XFAIL: Clang - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/any.fp16.test b/test/Feature/HLSLLib/any.fp16.test deleted file mode 100644 index c58e5e70..00000000 --- a/test/Feature/HLSLLib/any.fp16.test +++ /dev/null @@ -1,63 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = any(In[0]); - Out[1] = any(In[1].xyz); - Out[2] = any(In[1].w); - Out[3] = any(In[2].xy); - Out[4] = any(In[2].zw); - Out[5] = any(half4(0, 0, 0, 4.5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x0000, 0x0000, 0x0000, 0x4480, 0x0000, 0x0000, 0x0000, 0xc580, 0x0000, 0x3c00, 0xa11f, 0x211f ] - # 0, 0, 0, 4.5, 0, 0, 0, -5.5, 0, 1.0, -0.01, 0.01 - - Name: Out - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/any.fp64.test b/test/Feature/HLSLLib/any.fp64.test deleted file mode 100644 index 1b94d661..00000000 --- a/test/Feature/HLSLLib/any.fp64.test +++ /dev/null @@ -1,65 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = any(In[0]); - Out[1] = any(In[1].xyz); - Out[2] = any(In[1].w); - Out[3] = any(In[2].xy); - Out[4] = any(In[2].zw); - Out[5] = any(double4(0, 0, 0, 4.5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float64 - Stride: 32 - Data: [ 0, 0, 0, 4.5, 0, 0, 0, -5.5, 0, 1.0, -0.01, 0.01 ] - - Name: Out - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# Bug https://github.com/llvm/offload-test-suite/issues/370 -# XFAIL: DXC && DirectX-Intel - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/any.int16.test b/test/Feature/HLSLLib/any.int16.test deleted file mode 100644 index 7fab6c60..00000000 --- a/test/Feature/HLSLLib/any.int16.test +++ /dev/null @@ -1,103 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); - -RWStructuredBuffer Out0 : register(u2); -RWStructuredBuffer Out1 : register(u3); - - -[numthreads(1,1,1)] -void main() { - // int16_t - Out0[0] = any(In0[0]); - Out0[1] = any(In0[1].xyz); - Out0[2] = any(In0[1].w); - Out0[3] = any(In0[2].xy); - Out0[4] = any(In0[2].zw); - Out0[5] = any(int16_t4(0, 0, 0, 4)); - - // uint16_t - Out1[0] = any(In1[0]); - Out1[1] = any(In1[1].xyz); - Out1[2] = any(In1[1].w); - Out1[3] = any(In1[2].xy); - Out1[4] = any(In1[2].zw); - Out1[5] = any(uint16_t4(0, 0, 0, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Int16 - Stride: 8 - Data: [ 0, 0, 0, 4, 0, 0, 0, -5, 0, 2, -1, 1 ] - - Name: In1 - Format: UInt16 - Stride: 8 - Data: [ 0, 0, 0, 4, 0, 0, 0, 5, 0, 2, 100, 1 ] - - Name: Out0 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut0 - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] - - Name: Out1 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut1 - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/any.int64.test b/test/Feature/HLSLLib/any.int64.test deleted file mode 100644 index 3b030723..00000000 --- a/test/Feature/HLSLLib/any.int64.test +++ /dev/null @@ -1,106 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); - -RWStructuredBuffer Out0 : register(u2); -RWStructuredBuffer Out1 : register(u3); - - -[numthreads(1,1,1)] -void main() { - // int64_t - Out0[0] = any(In0[0]); - Out0[1] = any(In0[1].xyz); - Out0[2] = any(In0[1].w); - Out0[3] = any(In0[2].xy); - Out0[4] = any(In0[2].zw); - Out0[5] = any(int64_t4(0, 0, 0, 4)); - - // uint64_t - Out1[0] = any(In1[0]); - Out1[1] = any(In1[1].xyz); - Out1[2] = any(In1[1].w); - Out1[3] = any(In1[2].xy); - Out1[4] = any(In1[2].zw); - Out1[5] = any(uint64_t4(0, 0, 0, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Int64 - Stride: 32 - Data: [ 0, 0, 0, 4, 0, 0, 0, -5, 0, 2, -1, 1 ] - - Name: In1 - Format: UInt64 - Stride: 32 - Data: [ 0, 0, 0, 4, 0, 0, 0, 5, 0, 2, 100, 1 ] - - Name: Out0 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut0 - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] - - Name: Out1 - Format: Bool - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut1 - Format: Bool - Stride: 4 - Data: [ 1, 0, 1, 1, 1, 1 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# Bug https://github.com/llvm/offload-test-suite/issues/370 -# XFAIL: DXC && DirectX-Intel - -# REQUIRES: Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asdouble.32.test b/test/Feature/HLSLLib/asdouble.32.test deleted file mode 100644 index 3cf90d41..00000000 --- a/test/Feature/HLSLLib/asdouble.32.test +++ /dev/null @@ -1,94 +0,0 @@ -#--- source.hlsl -StructuredBuffer Low : register(t0); -StructuredBuffer High : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = asdouble(Low[0], High[0]); - Out[1] = double4(asdouble(Low[1].xyz, High[1].xyz), asdouble(Low[1].w, High[1].w)); - Out[2] = double4(asdouble(Low[2].xy, High[2].xy), asdouble(Low[2].zw, High[2].zw)); - Out[3] = asdouble(uint4(0, 0, 0, 3149642683), uint4(0, 1072693248, 3220176896, 2863311530)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: Low - Format: UInt32 - Stride: 16 - Data: [ - 0X00000000, 0x00000000, 0x00000000, 0xBBBBBBBB, - 0xDDDDDDDD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xAAAAAAAA, 0x00000000, 0xCDCDCDCD, 0x55555555 - ] - # 0, 0, 0, 3149642683, 3722304989, 4294967295, 4294967295, 4294967295, 2863311530, 0, 3452816845, 1431655765 - - Name: High - Format: UInt32 - Stride: 16 - Data: [ - 0X00000000, 0x3FF00000, 0xBFF00000, 0xAAAAAAAA, - 0xCCCCCCCC, 0xEEEEEEEE, 0x7FEFFFFF, 0xFFEFFFFF, - 0x00000000, 0x40900000, 0xABABABAB, 0x3FD55555 - ] - # 0, 1072693248, 3220176896, 2863311530, 3435973836, 4008636142, 2146435071, 4293918719, 0, 1083179008, 2880154539, 1070945621 - - Name: Out - Format: Float64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut - Format: Hex64 - Stride: 32 - Data: [ - 0X0000000000000000, 0x3FF0000000000000, 0xBFF0000000000000, 0xAAAAAAAABBBBBBBB, - 0xCCCCCCCCDDDDDDDD, 0xEEEEEEEEFFFFFFFF, 0x7FEFFFFFFFFFFFFF, 0xFFEFFFFFFFFFFFFF, - 0x00000000AAAAAAAA, 0x4090000000000000, 0xABABABABCDCDCDCD, 0x3FD5555555555555, - 0X0000000000000000, 0x3FF0000000000000, 0xBFF0000000000000, 0xAAAAAAAABBBBBBBB - ] - # 0, 1.0, -1.0, -3.72066e-103, -9.25596e61, -2.28999e226, 1.79769e+308, -1.79769e+308, denorm, 1024.0, -2.53017e-98, 0.33333, 0, 1.0, -1.0, -3.72066e-103 -Results: - - Result: Test - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: Low - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: High - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# https://github.com/llvm/llvm-project/issues/153513 -# XFAIL: Clang-Vulkan - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7699 -# XFAIL: DXC-Vulkan - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asfloat.test b/test/Feature/HLSLLib/asfloat.test deleted file mode 100644 index b50ed432..00000000 --- a/test/Feature/HLSLLib/asfloat.test +++ /dev/null @@ -1,136 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); -StructuredBuffer In2 : register(t2); - -RWStructuredBuffer Out0 : register(u3); -RWStructuredBuffer Out1 : register(u4); -RWStructuredBuffer Out2 : register(u5); - -[numthreads(1,1,1)] -void main() { - - Out0[0] = asfloat(In0[0]); - Out0[1] = float4(asfloat(In0[1].xyz), asfloat(In0[1].w)); - Out0[2] = float4(asfloat(In0[2].xy), asfloat(In0[2].zw)); - Out0[3] = asfloat(float4(0, 1, -50.555, 99.999)); - - Out1[0] = asfloat(In1[0]); - Out1[1] = float4(asfloat(In1[1].xyz), asfloat(In1[1].w)); - Out1[2] = float4(asfloat(In1[2].xy), asfloat(In1[2].zw)); - Out1[3] = asfloat(uint4(0, 50, 10000, 4294967295)); - - Out2[0] = asfloat(In2[0]); - Out2[1] = float4(asfloat(In2[1].xyz), asfloat(In2[1].w)); - Out2[2] = float4(asfloat(In2[2].xy), asfloat(In2[2].zw)); - Out2[3] = asfloat(int4(-1, 0, 50, 2147483647)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float32 - Stride: 16 - Data: [0, 1, -50.555, 99.999, -0.00001, 0.00001, 10.1, 20.2, 30.3, 40.4, 11111.111, 22.0002] - - Name: In1 - Format: UInt32 - Stride: 16 - Data: [0, 50, 10000, 4294967295, 1, 10, 100, 1000, 202, 303, 404, 505] - - Name: In2 - Format: Int32 - Stride: 16 - Data: [-1, 0, 50, 2147483647, 1, 10, 100, 1000, -202, -303, -404, -505] - - Name: Out0 - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut0 - Format: Float32 - Stride: 16 - Data: [0, 1, -50.555, 99.999, -0.00001, 0.00001, 10.1, 20.2, 30.3, 40.4, 11111.111, 22.0002, 0, 1, -50.555, 99.999] - - Name: Out1 - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut1 - Format: Float32 - Stride: 16 - Data: [ 0, 7.00649e-44, 1.4013e-41, -nan, 1.4013e-45, 1.4013e-44, 1.4013e-43, 1.4013e-42, 2.83062e-43, 4.24593e-43, 5.66125e-43, 7.07656e-43, 0, 7.00649e-44, 1.4013e-41, -nan ] - - Name: Out2 - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut2 - Format: Float32 - Stride: 16 - Data: [ -nan, 0, 7.00649e-44, nan, 1.4013e-45, 1.4013e-44, 1.4013e-43, 1.4013e-42, -nan, -nan, -nan, -nan, -nan, 0, 7.00649e-44, nan ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asin.16.test b/test/Feature/HLSLLib/asin.16.test deleted file mode 100644 index be71ae14..00000000 --- a/test/Feature/HLSLLib/asin.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = asin(In[0]); - half4 Tmp = {asin(In[1].xyz), asin(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {asin(In[2].xy), asin(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x3e00, 0xbe00, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 1.5, -1.5, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0x0000, 0x0000, 0x0000, 0x0000, 0x7e00, 0x3e48, 0xbe48, 0x7e00, 0x7e00, 0x7e00,] - # NaN, NaN, 0.0, 0.0, 0.0, 0.0, NaN, 1.570796, -1.570796, NaN, NaN, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asin.32.test b/test/Feature/HLSLLib/asin.32.test deleted file mode 100644 index daa5c55d..00000000 --- a/test/Feature/HLSLLib/asin.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = asin(In[0]); - float4 Tmp = {asin(In[1].xyz), asin(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {asin(In[2].xy), asin(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 1, -1, 1.5, -1.5, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 1.5, -1.5, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, nan, 0.0, 0.0, 0.0, 0.0, nan, 1.570796, -1.570796, nan, nan, nan,] - # NaN, NaN, 0.0, 0.0, 0.0, 0.0, NaN, 1.570796, -1.570796, NaN, NaN, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asint.test b/test/Feature/HLSLLib/asint.test deleted file mode 100644 index 480751a6..00000000 --- a/test/Feature/HLSLLib/asint.test +++ /dev/null @@ -1,144 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -StructuredBuffer In3 : register(t2); -RWStructuredBuffer Out1 : register(u3); -RWStructuredBuffer Out2 : register(u4); -RWStructuredBuffer Out3 : register(u5); - -[numthreads(1,1,1)] -void main() { - // int - Out1[0] = asint(int4(-1, 2, 0, 2147483647)); - Out1[1] = asint(In1[0]); - int4 Tmp = {asint(In1[0].xyz), asint(In1[0].w)}; - Out1[2] = Tmp; - Out1[3].xy = asint(In1[0].xy); - - // uint - Out2[0] = asint(uint4(2147483648, 0, 0xffffffff, 100)); - Out2[1] = asint(In2[0]); - int4 Tmp2 = {asint(In2[0].xyz), asint(In2[0].w)}; - Out2[2] = Tmp2; - Out2[3].xy = asint(In2[0].xy); - - // float - Out3[0] = asint(float4(10, 0, 5.57, 111.111)); - Out3[1] = asint(In3[0]); - int4 Tmp3 = {asint(In3[0].xyz), asint(In3[0].w)}; - Out3[2] = Tmp3; - Out3[3].xy = asint(In3[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Int32 - Stride: 16 - Data: [-1, 0, -2147483648, 2147483647] - - Name: In2 - Format: UInt32 - Stride: 16 - Data: [0xffffffff, 0, 0x80000000, 0x7FFFFFFF] - # uint max, 0, 2147483648 , 2147483647 - - Name: In3 - Format: Float32 - Stride: 16 - Data: [10, 0.5, 0, 11.125] - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut1 # The result we expect - Format: Int32 - Stride: 16 - Data: [-1, 2, 0, 2147483647, -1, 0, -2147483648, 2147483647, -1, 0, -2147483648, 2147483647, -1, 0, 0, 0, ] - - Name: Out2 - Format: Int32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut2 # The result we expect - Format: Int32 - Stride: 16 - Data: [-2147483648, 0, -1, 100, -1, 0, -2147483648, 2147483647, -1, 0, -2147483648, 2147483647, -1, 0, 0, 0] - - Name: Out3 - Format: Int32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut3 # The result we expect - Format: Int32 - Stride: 16 - Data: [ 1092616192, 0, 1085422961, 1121859797, 1092616192, 1056964608, 0, 1093795840, 1092616192, 1056964608, 0, 1093795840, 1092616192, 1056964608, 0, 0 ] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 - - Result: Test3 - Rule: BufferExact - Actual: Out3 - Expected: ExpectedOut3 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: In3 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out3 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/154214 -# XFAIL: Clang-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asint16.fp16.test b/test/Feature/HLSLLib/asint16.fp16.test deleted file mode 100644 index bc609e41..00000000 --- a/test/Feature/HLSLLib/asint16.fp16.test +++ /dev/null @@ -1,62 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -RWStructuredBuffer Out0 : register(u1); - -[numthreads(1,1,1)] -void main() { - - Out0[0] = asint16(In0[0]); - Out0[1] = int16_t4(asint16(In0[1].xyz), asint16(In0[1].w)); - Out0[2] = int16_t4(asint16(In0[2].xy), asint16(In0[2].zw)); - Out0[3] = asint16(half4(-5, 0, 10.111, 0.05)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float16 - Stride: 8 - Data: [0xc500, 0, 0x490e, 0x2a66, 0xbc00, 0x5640, 0xc500, 0, 0x490e, 0x2a66, 0xbc00, 0x5640] - # [-5, 0, 10.111, 0.05, -1, 100, -5, 0, 10.111, 0.05, -1, 100] - - Name: Out0 - Format: Int16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: Int16 - Stride: 8 - Data: [ -15104, 0, 18702, 10854, -17408, 22080, -15104, 0, 18702, 10854, -17408, 22080, -15104, 0, 18702, 10854 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# REQUIRES: Int16 -# REQUIRES: Half - -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asint16.int16.test b/test/Feature/HLSLLib/asint16.int16.test deleted file mode 100644 index c3420432..00000000 --- a/test/Feature/HLSLLib/asint16.int16.test +++ /dev/null @@ -1,96 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); -RWStructuredBuffer Out0 : register(u2); -RWStructuredBuffer Out1 : register(u3); - -[numthreads(1,1,1)] -void main() { - - Out0[0] = asint16(In0[0]); - Out0[1] = int16_t4(asint16(In0[1].xyz), asint16(In0[1].w)); - Out0[2] = int16_t4(asint16(In0[2].xy), asint16(In0[2].zw)); - Out0[3] = asint16(int16_t4(0, 32767, -100, -32768)); - - Out1[0] = asint16(In1[0]); - Out1[1] = int16_t4(asint16(In1[1].xyz), asint16(In1[1].w)); - Out1[2] = int16_t4(asint16(In1[2].xy), asint16(In1[2].zw)); - Out1[3] = asint16(uint16_t4(0, 100, 65535, 10)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Int16 - Stride: 8 - Data: [0, 32767, -100, -32768, 10, 20, 30, 6000, -3000, -30000, 50, 60] - - Name: In1 - Format: UInt16 - Stride: 8 - Data: [0, 100, 65535, 10, 20, 30, 40, 50, 60, 70, 80, 6000] - - Name: Out0 - Format: Int16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: Int16 - Stride: 8 - Data: [0, 32767, -100, -32768, 10, 20, 30, 6000, -3000, -30000, 50, 60, 0, 32767, -100, -32768] - - Name: Out1 - Format: Int16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut1 - Format: Int16 - Stride: 8 - Data: [ 0, 100, -1, 10, 20, 30, 40, 50, 60, 70, 80, 6000, 0, 100, -1, 10 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asuint.32.test b/test/Feature/HLSLLib/asuint.32.test deleted file mode 100644 index c72a989c..00000000 --- a/test/Feature/HLSLLib/asuint.32.test +++ /dev/null @@ -1,133 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); -StructuredBuffer In2 : register(t2); - -RWStructuredBuffer Out0 : register(u3); -RWStructuredBuffer Out1 : register(u4); -RWStructuredBuffer Out2 : register(u5); - -[numthreads(1,1,1)] -void main() { - - Out0[0] = asuint(In0[0]); - Out0[1] = uint4(asuint(In0[1].xyz), asuint(In0[1].w)); - Out0[2] = uint4(asuint(In0[2].xy), asuint(In0[2].zw)); - Out0[3] = asuint(uint4(100, 0, 4294967295, 75757575)); - - Out1[0] = asuint(In1[0]); - Out1[1] = uint4(asuint(In1[1].xyz), asuint(In1[1].w)); - Out1[2] = uint4(asuint(In1[2].xy), asuint(In1[2].zw)); - Out1[3] = asuint(int4(-1, 0, 2147483647, -2147483648)); - - Out2[0] = asuint(In2[0]); - Out2[1] = uint4(asuint(In2[1].xyz), asuint(In2[1].w)); - Out2[2] = uint4(asuint(In2[2].xy), asuint(In2[2].zw)); - Out2[3] = asuint(float4(10, 0, 5.57, 111.111)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: UInt32 - Stride: 16 - Data: [100, 0, 4294967295, 75757575, 10, 20, 30, 40 , 50, 60, 70, 80] - - Name: In1 - Format: Int32 - Stride: 16 - Data: [-1, 0, 2147483647, -2147483648, -10, -20, -30, -40, -50, 60, 70, 80] - - Name: In2 - Format: Float32 - Stride: 16 - Data: [10, 0, 5.57, 111.111, 20.5, 30.6, 40.7, 50.8, -60.9, -0.000001, -0.0002, -888.888] - - Name: Out0 - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut0 - Format: UInt32 - Stride: 16 - Data: [100, 0, 4294967295, 75757575, 10, 20, 30, 40 , 50, 60, 70, 80, 100, 0, 4294967295, 75757575] - - Name: Out1 - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut1 - Format: UInt32 - Stride: 16 - Data: [ 4294967295, 0, 2147483647, 2147483648, 4294967286, 4294967276, 4294967266, 4294967256, 4294967246, 60, 70, 80, 4294967295, 0, 2147483647, 2147483648] - - Name: Out2 - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut2 - Format: UInt32 - Stride: 16 - Data: [ 1092616192, 0, 1085422961, 1121859797, 1101266944, 1106562253, 1109576909, 1112224563, 3262355866, 3045472189, 3109140247, 3294509269, 1092616192, 0, 1085422961, 1121859797 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asuint.64.test b/test/Feature/HLSLLib/asuint.64.test deleted file mode 100644 index e8fd2fa9..00000000 --- a/test/Feature/HLSLLib/asuint.64.test +++ /dev/null @@ -1,93 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -RWStructuredBuffer Lows : register(u1); -RWStructuredBuffer Highs : register(u2); - -[numthreads(1,1,1)] -void main() { - - asuint(In0[0], Lows[0], Highs[0]); - uint4 Tmp0; - uint4 Tmp1; - asuint(In0[1].xyz, Tmp0.xyz, Tmp1.xyz); - asuint(In0[1].w, Tmp0.w, Tmp1.w); - Lows[1] = Tmp0; - Highs[1]= Tmp1; - asuint(In0[2].xy, Tmp0.xy, Tmp1.xy); - asuint(In0[2].zw, Tmp0.zw, Tmp1.zw); - Lows[2] = Tmp0; - Highs[2] = Tmp1; - asuint(double4(10, 0, 5.57, 111.111), Lows[3], Highs[3]); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float64 - Stride: 32 - Data: [10, 0, 5.57, 111.111, 20.5, 30.6, 40.7, 50.8, -60.9, -0.000001, -0.0002, -888.888] - - Name: Lows - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedLows - Format: UInt32 - Stride: 16 - Data: [ 0, 0, 343597384, 2680059593, 0, 2576980378, 2576980378, 1717986918, 858993459, 2696277389, 3944497965, 2680059593, 0, 0, 536870912, 2684354560 ] - - Name: Highs - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedHighs - Format: UInt32 - Stride: 16 - Data: [ 1076101120, 0, 1075201966, 1079756570, 1077182464, 1077844377, 1078221209, 1078552166, 3226366771, 3199256311, 3207214818, 3230385946, 1076101120, 0, 1075201966, 1079756570 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Lows - Expected: ExpectedLows - - Result: Test1 - Rule: BufferExact - Actual: Highs - Expected: ExpectedHighs -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Lows - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Highs - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7666 -# XFAIL: DXC-Vulkan - -# https://github.com/llvm/llvm-project/issues/153091 -# XFAIL: Clang-Vulkan - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -HV 202x -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asuint16.fp16.test b/test/Feature/HLSLLib/asuint16.fp16.test deleted file mode 100644 index 6d35f6dc..00000000 --- a/test/Feature/HLSLLib/asuint16.fp16.test +++ /dev/null @@ -1,61 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -RWStructuredBuffer Out0 : register(u1); - -[numthreads(1,1,1)] -void main() { - - Out0[0] = asuint16(In0[0]); - Out0[1] = uint16_t4(asuint16(In0[1].xyz), asuint16(In0[1].w)); - Out0[2] = uint16_t4(asuint16(In0[2].xy), asuint16(In0[2].zw)); - Out0[3] = asuint16(half4(-5, 0, 10.111, 0.05)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float16 - Stride: 8 - Data: [0xc500, 0, 0x490e, 0x2a66, 0xbc00, 0x5640, 0xc500, 0, 0x490e, 0x2a66, 0xbc00, 0x5640] - # [-5, 0, 10.111, 0.05, -1, 100, -5, 0, 10.111, 0.05, -1, 100] - - Name: Out0 - Format: UInt16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: UInt16 - Stride: 8 - Data: [ 50432, 0, 18702, 10854, 48128, 22080, 50432, 0, 18702, 10854, 48128, 22080, 50432, 0, 18702, 10854 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# REQUIRES: Int16 -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/asuint16.int16.test b/test/Feature/HLSLLib/asuint16.int16.test deleted file mode 100644 index 8cffe691..00000000 --- a/test/Feature/HLSLLib/asuint16.int16.test +++ /dev/null @@ -1,96 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); -RWStructuredBuffer Out0 : register(u2); -RWStructuredBuffer Out1 : register(u3); - -[numthreads(1,1,1)] -void main() { - - Out0[0] = asuint16(In0[0]); - Out0[1] = uint16_t4(asuint16(In0[1].xyz), asuint16(In0[1].w)); - Out0[2] = uint16_t4(asuint16(In0[2].xy), asuint16(In0[2].zw)); - Out0[3] = asuint16(uint16_t4(0, 100, 65535, 10)); - - Out1[0] = asuint16(In1[0]); - Out1[1] = uint16_t4(asuint16(In1[1].xyz), asuint16(In1[1].w)); - Out1[2] = uint16_t4(asuint16(In1[2].xy), asuint16(In1[2].zw)); - Out1[3] = asuint16(int16_t4(0, 32767, -100, -32768)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: UInt16 - Stride: 8 - Data: [0, 100, 65535, 10, 20, 30, 40, 50, 60, 70, 80, 6000] - - Name: In1 - Format: Int16 - Stride: 8 - Data: [0, 32767, -100, -32768, 10, 20, 30, 6000, -3000, -30000, 50, 60] - - Name: Out0 - Format: UInt16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: UInt16 - Stride: 8 - Data: [0, 100, 65535, 10, 20, 30, 40, 50, 60, 70, 80, 6000, 0, 100, 65535, 10] - - Name: Out1 - Format: UInt16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut1 - Format: UInt16 - Stride: 8 - Data: [ 0, 32767, 65436, 32768, 10, 20, 30, 6000, 62536, 35536, 50, 60, 0, 32767, 65436, 32768 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/atan.16.test b/test/Feature/HLSLLib/atan.16.test deleted file mode 100644 index ca9d6bda..00000000 --- a/test/Feature/HLSLLib/atan.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = atan(In[0]); - half4 Tmp = {atan(In[1].xyz), atan(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {atan(In[2].xy), atan(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x7e00, 0x7e00, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xbe48, 0x0000, 0x0000, 0x0000, 0x0000, 0x3e48, 0x3a48, 0xba48, 0x7e00, 0x7e00, 0x7e00,] - # NaN, -1.570796, 0.0, 0.0, 0.0, 0.0, 1.570796, 0.785398163, -0.785398163, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/atan.32.test b/test/Feature/HLSLLib/atan.32.test deleted file mode 100644 index 560d41a8..00000000 --- a/test/Feature/HLSLLib/atan.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = atan(In[0]); - float4 Tmp = {atan(In[1].xyz), atan(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {atan(In[2].xy), atan(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 1, -1, nan, nan, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, -1.570796, 0.0, 0.0, 0.0, 0.0, 1.570796, 0.785398163, -0.785398163, nan, nan, nan,] - # NaN, -1.570796, 0.0, 0.0, 0.0, 0.0, 1.570796, 0.785398163, -0.785398163, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/atan2.16.test b/test/Feature/HLSLLib/atan2.16.test deleted file mode 100644 index a61bb524..00000000 --- a/test/Feature/HLSLLib/atan2.16.test +++ /dev/null @@ -1,78 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); - -RWStructuredBuffer Out0 : register(u2); - -[numthreads(1,1,1)] -void main() { - Out0[0] = atan2(In0[0], In1[0]); - Out0[1] = half4(atan2(In0[1].xyz, In1[1].xyz), atan2(In0[1].w, In1[1].w)); - Out0[2] = half4(atan2(In0[2].xy, In1[2].xy), atan2(In0[2].zw, In1[2].zw)); - Out0[3] = atan2(half4(10.0, -5.0, 20.25, 1), half4(2, -1, 0.25, 2)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float16 - Stride: 8 - Data: [0x4900, 0xc500, 0x4d10, 0x3c00, 0x4d20, 0x4fa0, 0x5110, 0x5250, 0xb400, 0xcd00, 0xcf80, 0x3400] - # 10, -5, 20.25, 1, 20.5, 30.5, 40.5, 50.5, -0.25, -20, -30, 0.25 - - Name: In1 - Format: Float16 - Stride: 8 - Data: [0x4000, 0xbc00, 0x3400, 0x4000, 0x4580, 0x4680, 0x4780, 0x4840, 0xd640, 0xcd00, 0xcf80, 0x3400] - # 2, -1, 0.25, 2, 5.5, 6.5, 7.5, 8.5, -100, -20, -30, 0.25 - - Name: Out0 - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: Float16 - Stride: 8 - Data: [ 0x3D7E, 0xBF12, 0x3E3C, 0x376B, 0x3D3C, 0x3D71, 0x3D8D, 0x3D9E, 0xC247, 0xC0B6, 0xC0B6, 0x3A48, 0x3D7E, 0xBF12, 0x3E3C, 0x376B ] - # 1.3730469, -1.7675781, 1.5585938, 0.46362305, 1.3085938, 1.3603516, 1.3876953, 1.4042969, -3.1386719, -2.3554688, -2.3554688, 0.78515625, 1.3730469, -1.7675781, 1.5585938, 0.46362305 -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0.003 - Actual: Out0 - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7691 -# XFAIL: DXC-Vulkan - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/atan2.32.test b/test/Feature/HLSLLib/atan2.32.test deleted file mode 100644 index 84884442..00000000 --- a/test/Feature/HLSLLib/atan2.32.test +++ /dev/null @@ -1,72 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); - -RWStructuredBuffer Out0 : register(u2); - -[numthreads(1,1,1)] -void main() { - Out0[0] = atan2(In0[0], In1[0]); - Out0[1] = float4(atan2(In0[1].xyz, In1[1].xyz), atan2(In0[1].w, In1[1].w)); - Out0[2] = float4(atan2(In0[2].xy, In1[2].xy), atan2(In0[2].zw, In1[2].zw)); - Out0[3] = atan2(float4(10.0, -5.0, 20.25, 1), float4(2, -1, 0.25, 2)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float32 - Stride: 16 - Data: [10.0, -5.0, 20.25, 1, 20.2, 30.3, 40.4, 50.5, -0.25, -20, -30, -400] - - Name: In1 - Format: Float32 - Stride: 16 - Data: [2, -1, 0.25, 2, 5.5, 6.6, 7.7, 8.8, -100, -20, -30, 0.25] - - Name: Out0 - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut0 - Format: Float32 - Stride: 16 - Data: [ 1.3734, -1.76819, 1.55845, 0.463648, 1.30496, 1.35632, 1.38246, 1.39827, -3.13909, -2.35619, -2.35619, -1.57017, 1.3734, -1.76819, 1.55845, 0.463648 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out0 - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/ceil.16.test b/test/Feature/HLSLLib/ceil.16.test deleted file mode 100644 index 5f4818c0..00000000 --- a/test/Feature/HLSLLib/ceil.16.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = ceil(In[0]); - half4 Tmp = {ceil(In[1].xyz), ceil(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {ceil(In[2].xy), ceil(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = ceil(In[3]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x4900, 0x4933, 0x4940, 0x494d, 0xc900, 0xc933, 0xc940, 0xc94d, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 10.0, 10.4, 10.5, 10.6, -10.0, -10.4, -10.5, -10.6, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8000, 0x8000, 0x0000, 0x3c00, 0x7c00, 0x4900, 0x4980, 0x4980, 0x4980, 0xc900, 0xc900, 0xc900, 0xc900, 0x7e00,] - # NaN, -Inf, -0, -0, 0, 1, Inf, 10.0, 11.0, 11.0, 11.0, -10.0, -10.0, -10.0, -10.0, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/ceil.32.test b/test/Feature/HLSLLib/ceil.32.test deleted file mode 100644 index 630868cf..00000000 --- a/test/Feature/HLSLLib/ceil.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = ceil(In[0]); - float4 Tmp = {ceil(In[1].xyz), ceil(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {ceil(In[2].xy), ceil(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = ceil(In[3]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0, inf, 10.0, 10.4, 10.5, 10.6, -10.0, -10.4, -10.5, -10.6, nan,] - # NaN, -Inf, -denorm, -0, 0, 0, Inf, 10.0, 10.4, 10.5, 10.6, -10.0, -10.4, -10.5, -10.6, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0, -0, 0, 0, inf, 10.0, 11.0, 11.0, 11.0, -10.0, -10.0, -10.0, -10.0, nan,] - # NaN, -Inf, -0, -0, 0, 0, Inf, 10.0, 11.0, 11.0, 11.0, -10.0, -10.0, -10.0, -10.0, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/cos.16.test b/test/Feature/HLSLLib/cos.16.test deleted file mode 100644 index a7c68403..00000000 --- a/test/Feature/HLSLLib/cos.16.test +++ /dev/null @@ -1,69 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = cos(In[0]); - half4 Tmp = {cos(In[1].xyz), cos(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {cos(In[2].xy), cos(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = cos(In[3]); - Out[4] = cos(In[4]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3906, 0x3d06, 0x3f8a, 0x4106, 0x4248, 0x438a, 0x4466, 0x4506, 0x45a7, 0x4648, 0x7e00, 0x7e00, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 0.6279297, 1.255859, 1.884766, 2.511719, 3.140625, 3.769531, 4.398438, 5.023438, 5.652344, 6.281250, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 40 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0x3c00, 0x3c00, 0x3c00, 0x3c00, 0x7e00, 0x3a79, 0x34f5, 0xb4f1, 0xba77, 0xbc00, 0xba79, 0xb4f1, 0x34e6, 0x3a76, 0x3c00, 0x7e00, 0x7e00, 0x7e00,] - # NaN, NaN, 1.0, 1.0, 1.0, 1.0, NaN, 0.80924553, 0.30975693, -0.30883664, -0.80810183, -0.99999952, -0.80924052, -0.30881903, 0.30605716, 0.80753154, 0.99999809, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.003 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/cos.32.test b/test/Feature/HLSLLib/cos.32.test deleted file mode 100644 index eb826f20..00000000 --- a/test/Feature/HLSLLib/cos.32.test +++ /dev/null @@ -1,69 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = cos(In[0]); - float4 Tmp = {cos(In[1].xyz), cos(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {cos(In[2].xy), cos(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, -314.16, 314.16, nan, nan, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, -314.16, 314.16, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, nan, 1.0, 1.0, 1.0, 1.0, nan, 0.99999973015, 0.99999973015, nan, nan, nan,] - # NaN, NaN, 1.0, 1.0, 1.0, 1.0, NaN, 0.99999973015, 0.99999973015, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/KhronosGroup/SPIRV-Cross/issues/2525 -# XFAIL: Vulkan-Darwin - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/cosh.16.test b/test/Feature/HLSLLib/cosh.16.test deleted file mode 100644 index 6e59be7c..00000000 --- a/test/Feature/HLSLLib/cosh.16.test +++ /dev/null @@ -1,70 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = cosh(In[0]); - half4 Tmp = {cosh(In[1].xyz), cosh(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {cosh(In[2].xy), cosh(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x7e00, 0x7e00, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7c00, 0x3c00, 0x3c00, 0x3c00, 0x3c00, 0x7c00, 0x3e2c, 0x3e2c, 0x7e00, 0x7e00, 0x7e00,] - # NaN, Inf, 1.0, 1.0, 1.0, 1.0, Inf, 1.543081, 1.543081, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/KhronosGroup/SPIRV-Cross/issues/2507 -# XFAIL: Vulkan-Darwin - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/cosh.32.test b/test/Feature/HLSLLib/cosh.32.test deleted file mode 100644 index b1b6c51d..00000000 --- a/test/Feature/HLSLLib/cosh.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = cosh(In[0]); - float4 Tmp = {cosh(In[1].xyz), cosh(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {cosh(In[2].xy), cosh(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 1, -1, nan, nan, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, inf, 1.0, 1.0, 1.0, 1.0, inf, 1.543081, 1.543081, nan, nan, nan,] - # NaN, Inf, 1.0, 1.0, 1.0, 1.0, Inf, 1.543081, 1.543081, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 4 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/countbits.16.test b/test/Feature/HLSLLib/countbits.16.test deleted file mode 100644 index 2b6ea97e..00000000 --- a/test/Feature/HLSLLib/countbits.16.test +++ /dev/null @@ -1,135 +0,0 @@ -#--- source.hlsl - -StructuredBuffer InInt : register(t0); -StructuredBuffer InUInt : register(t1); - -// Explicitly using a size 3 buffer illustrates an error when using warp -StructuredBuffer InIntWarp : register(t4); -StructuredBuffer InUIntWarp : register(t5); - -RWStructuredBuffer OutInt : register(u2); -RWStructuredBuffer OutUInt : register(u3); - -[numthreads(1,1,1)] -void main() { - // Int - OutInt[0] = countbits(InInt[0]); - uint4 OutOne = {countbits(InIntWarp[0]), countbits(InUInt[0].w)}; - OutInt[1] = OutOne; - OutInt[2].xy = countbits(InInt[0].xy); - - // UInt - OutUInt[0] = countbits(InUInt[0]); - uint4 OutUOne = {countbits(InUIntWarp[0]), countbits(InUInt[0].w)}; - OutUInt[1] = OutUOne; - OutUInt[2].xy = countbits(InUInt[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: InInt - Format: Int16 - Stride: 8 - Data: [0x2A, 0, 0x0f0f, -1] - - Name: InUInt - Format: UInt16 - Stride: 8 - Data: [0x2A, 0, 0x0f0f, 0xffff] - - Name: InIntWarp - Format: Int16 - Stride: 6 - Data: [0, 0x0f0f, -1] - - Name: InUIntWarp - Format: UInt16 - Stride: 6 - Data: [0, 0x0f0f, 0xffff] - - Name: OutInt - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: OutUInt - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - Data: [3, 0, 8, 16, 3, 0, 8, 16, 3, 0, 0, 0] -Results: - - Result: Test1 - Rule: BufferExact - Actual: OutInt - Expected: ExpectedOut - - Result: Test2 - Rule: BufferExact - Actual: OutUInt - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: InInt - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: InUInt - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: InIntWarp - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: InUIntWarp - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: OutInt - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: OutUInt - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# REQUIRES: Int16 - -# Bug in DXC-Vulkan -# https://github.com/microsoft/DirectXShaderCompiler/issues/7494 -# Bug in Clang-Vulkan -# https://github.com/llvm/llvm-project/issues/142677 -# However, the behaviour is also inconsistent across GPUs when using directx -# and requires an HLK test so we will mark everything as UNSUPPORTED. -# For more context see here: -# https://github.com/microsoft/DirectXShaderCompiler/issues/7499 -# UNSUPPORTED: DXC -# UNSUPPORTED: Clang - -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/countbits.32.test b/test/Feature/HLSLLib/countbits.32.test deleted file mode 100644 index 3603e13a..00000000 --- a/test/Feature/HLSLLib/countbits.32.test +++ /dev/null @@ -1,95 +0,0 @@ -#--- source.hlsl - -StructuredBuffer InInt : register(t0); -StructuredBuffer InUInt : register(t1); -RWStructuredBuffer OutInt : register(u2); -RWStructuredBuffer OutUInt : register(u3); - -[numthreads(1,1,1)] -void main() { - // Int - OutInt[0] = countbits(InInt[0]); - uint4 OutOne = {countbits(InInt[0].xyz), countbits(InInt[0].w)}; - OutInt[1] = OutOne; - OutInt[2].xy = countbits(InInt[0].xy); - - // UInt - OutUInt[0] = countbits(InUInt[0]); - uint4 OutUOne = {countbits(InUInt[0].xyz), countbits(InUInt[0].w)}; - OutUInt[1] = OutUOne; - OutUInt[2].xy = countbits(InUInt[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: InInt - Format: Int32 - Stride: 16 - Data: [0x2A, 0, 0x0f0f0f0f, -1] - - Name: InUInt - Format: UInt32 - Stride: 16 - Data: [0x2A, 0, 0x0f0f0f0f, 0xffffffff] - - Name: OutInt - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: OutUInt - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - Data: [3, 0, 16, 32, 3, 0, 16, 32, 3, 0, 0, 0] -Results: - - Result: Test1 - Rule: BufferExact - Actual: OutInt - Expected: ExpectedOut - - Result: Test2 - Rule: BufferExact - Actual: OutUInt - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: InInt - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: InUInt - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: OutInt - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: OutUInt - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/countbits.64.test b/test/Feature/HLSLLib/countbits.64.test deleted file mode 100644 index 4cf6674e..00000000 --- a/test/Feature/HLSLLib/countbits.64.test +++ /dev/null @@ -1,102 +0,0 @@ -#--- source.hlsl - -StructuredBuffer InInt : register(t0); -StructuredBuffer InUInt : register(t1); - -RWStructuredBuffer OutInt : register(u2); -RWStructuredBuffer OutUInt : register(u3); - -[numthreads(1,1,1)] -void main() { - // Int - OutInt[0] = countbits(InInt[0]); - uint4 OutOne = {countbits(InInt[0].xyz), countbits(InInt[0].w)}; - OutInt[1] = OutOne; - OutInt[2].xy = countbits(InInt[0].xy); - - // UInt - OutUInt[0] = countbits(InUInt[0]); - uint4 OutUOne = {countbits(InUInt[0].xyz), countbits(InUInt[0].w)}; - OutUInt[1] = OutUOne; - OutUInt[2].xy = countbits(InUInt[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: InInt - Format: Int64 - Stride: 32 - Data: [0x2A, 0, 0x0f0f0f0f0f0f0f0f, -1] - - Name: InUInt - Format: UInt64 - Stride: 32 - Data: [0x2A, 0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff] - - Name: OutInt - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: OutUInt - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - Data: [3, 0, 32, 64, 3, 0, 32, 64, 3, 0, 0, 0] -Results: - - Result: Test1 - Rule: BufferExact - Actual: OutInt - Expected: ExpectedOut - - Result: Test2 - Rule: BufferExact - Actual: OutUInt - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: InInt - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: InUInt - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: OutInt - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: OutUInt - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# REQUIRES: Int64 - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7494 -# https://github.com/llvm/llvm-project/issues/142677 -# UNSUPPORTED: Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/cross.16.test b/test/Feature/HLSLLib/cross.16.test deleted file mode 100644 index 13ebc148..00000000 --- a/test/Feature/HLSLLib/cross.16.test +++ /dev/null @@ -1,77 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - // Only accepts vectors of length 3 - Out[0] = half4(cross(X[0].xyz, Y[0].xyz), 0); - Out[1] = half4(cross(X[1].xyz, Y[1].xyz), 0); - Out[2] = half4(cross(X[2].xyz, Y[2].xyz), 0); - Out[3] = half4(cross(half3(1, 0, 0), half3(0, 1, 0)), 0); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0x0000, 0x0000, 0x0000, 0x4000, 0x4200, 0x4400, 0x0000, 0xbd00, 0xc100, 0xc200, 0x0000 ] # Every 4th value is filler - # 1, 0, 0, 0, 2, 3, 4, 0, -1.25, -2.5, -3, 0 - - Name: Y - Format: Float16 - Stride: 8 - Data: [ 0x0000, 0x3c00, 0x0000, 0x0000, 0x4400, 0x4600, 0x4800, 0x0000, 0x4440, 0x4500, 0x46c0, 0x0000 ] # Every 4th value is filler - # 0, 1, 0, 0, 4, 6, 8, 0, 4.25, 5, 6.75, 0 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: Float16 - Stride: 8 - Data: [ 0x0000, 0x0000, 0x3c00, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xbf80, 0xc450, 0x4460, 0x0000, 0x0000, 0x0000, 0x3c00, 0x0000 ] # Every 4th value is filler - # 0, 0, 1, 0, 0, 0, 0, 0, -1.875, -4.3125, 4.375, 0, 0, 0, 1, 0 -Results: - - Result: Test0 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/cross.32.test b/test/Feature/HLSLLib/cross.32.test deleted file mode 100644 index 30e27ae9..00000000 --- a/test/Feature/HLSLLib/cross.32.test +++ /dev/null @@ -1,73 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - // Only accepts vectors of length 3 - Out[0] = float4(cross(X[0].xyz, Y[0].xyz), 0); - Out[1] = float4(cross(X[1].xyz, Y[1].xyz), 0); - Out[2] = float4(cross(X[2].xyz, Y[2].xyz), 0); - Out[3] = float4(cross(float3(1, 0, 0), float3(0, 1, 0)), 0); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float32 - Stride: 16 - Data: [ 1, 0, 0, 0, 2, 3, 4, 0, -1.25, -2.5, -3, 0 ] # Every 4th value is filler - - Name: Y - Format: Float32 - Stride: 16 - Data: [ 0, 1, 0, 0, 4, 6, 8, 0, 4.25, 5, 6.75, 0 ] # Every 4th value is filler - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut - Format: Float32 - Stride: 16 - Data: [ 0, 0, 1, 0, 0, 0, 0, 0, -1.875, -4.3125, 4.375, 0, 0, 0, 1, 0 ] # Every 4th value is filler -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/degrees.16.test b/test/Feature/HLSLLib/degrees.16.test deleted file mode 100644 index 194e77ca..00000000 --- a/test/Feature/HLSLLib/degrees.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = degrees(In[0]); - half4 Tmp = {degrees(In[1].xyz), degrees(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {degrees(In[2].xy), degrees(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x4000, 0xc248, 0x4248,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 2, -3.1415926, 3.1415926, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8039, 0x8000, 0x0000, 0x0039, 0x7c00, 0x5329, 0xd329, 0x5729, 0xd99f, 0x599f,] - # NaN, -inf, --0.0000033974648, -0, 0, 0.0000033974648, inf, 57.295779, -57.295779, 114.592, -180, 180, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/degrees.32.test b/test/Feature/HLSLLib/degrees.32.test deleted file mode 100644 index 133af8b5..00000000 --- a/test/Feature/HLSLLib/degrees.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = degrees(In[0]); - float4 Tmp = {degrees(In[1].xyz), degrees(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {degrees(In[2].xy), degrees(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 1, -1, 2, -3.1415926, 3.1415926,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 2, -3.1415926, 3.1415926, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0, -0, 0, 0, inf, 0x1.ca5dc20000000p+5, -0x1.ca5dc20000000p+5, 0x1.ca5dc20000000p+6, -180, 180,] - # nan, -inf, -0, -0, 0, 0, inf, 57.2958, -57.2958, 114.591, -180, 180 -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/distance.16.test b/test/Feature/HLSLLib/distance.16.test deleted file mode 100644 index 2b9414dc..00000000 --- a/test/Feature/HLSLLib/distance.16.test +++ /dev/null @@ -1,127 +0,0 @@ -#--- source.hlsl - -// This test tests four different distance scenarios -// One in 1D, 2D, 3D, and 4D - -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Result : register(u2); - -[numthreads(1,1,1)] -void main() { - // distance ({1.125}, {2.375}) = 1.25 - half R0 = distance(X[0].x, Y[0].x); - Result[0] = R0; - half R0_constant = distance(half(1.125), half(2.375)); - Result[1] = R0_constant; - - // distance({1.125, 2.5}, {2.375, 5.25}) = 3.02076 - half R1 = distance(X[0].xy, Y[0].xy); - Result[2] = R1; - half R1_constant = distance(half2(1.125, 2.5), half2(2.375, 5.25)); - Result[3] = R1_constant; - - // distance({1.125, 2.5, 4.75}, {2.375, 5.25, 8.375}) = 4.71865 - half R2 = distance(X[0].xyz, Y[0].xyz); - Result[4] = R2; - half R2_constant = distance(half3(1.125, 2.5, 4.75), half3(2.375, 5.25, 8.375)); - Result[5] = R2_constant; - - // distance({1.125, 2.5, 4.75, 6.625}, {2.375, 5.25, 8.375, 5.30}) = 4.90115 - half R3 = distance(X[0], Y[0]); - Result[6] = R3; - half R3_constant = distance(half4(1.125, 2.5, 4.75, 6.625), half4(2.375, 5.25, 8.375, 5.30)); - Result[7] = R3_constant; - - // distance ({-7.29}, {-12.29}) = 5.0 - half R4 = distance(X[1].x, Y[1].x); - Result[8] = R4; - half R4_constant = distance(half(-7.29), half(-12.29)); - Result[9] = R4_constant; - - // distance({-7.29, 137.14}, {-12.29, -4.0}) = 141.2303 - half R5 = distance(X[1].xy, Y[1].xy); - Result[10] = R5; - half R5_constant = distance(half2(-7.29, 137.14), half2(-12.29, -4.0)); - Result[11] = R5_constant; - - // distance({-7.29, 137.14, 1.1}, {-12.29, -4.0, -2.1}) = 141.2406 - half R6 = distance(X[1].xyz, Y[1].xyz); - Result[12] = R6; - half R6_constant = distance(half3(-7.29, 137.14, 1.1), half3(-12.29, -4.0, -2.1)); - Result[13] = R6_constant; - - // distance({-7.29, 137.14, 1.1, -3.5}, {-12.29, -4.0, -2.1, -2.5}) = 141.2445 - half R7 = distance(X[1], Y[1]); - Result[14] = R7; - half R7_constant = distance(half4(-7.29, 137.14, 11.1, -30.5), half4(-12.29, -4.0, -2.1, -2.5)); - Result[15] = R7_constant; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float16 - Stride: 8 - Data: [ 0x3c80, 0x4100, 0x44c0, 0x46a0, 0xc74a, 0x5849, 0x498d, 0xcfa0 ] - # 1.125, 2.5, 4.75, 6.625, -7.29, 137.14, 11.1, -30.5 - - Name: Y - Format: Float16 - Stride: 8 - Data: [ 0x40c0, 0x4540, 0x4830, 0x454d, 0xca25, 0xc400, 0xc033, 0xc100 ] - # 2.375, 5.25, 8.375, 5.30, -12.29, -4.0, -2.1, -2.5 - - Name: Result - Format: Float16 - Stride: 2 - ZeroInitSize: 32 - - Name: ExpectedResult - Format: Float16 - Stride: 2 - Data: [ 0x3d00, 0x3d00, 0x420b, 0x420b, 0x44b8, 0x44b8, 0x44e7, 0x44e7, 0x4500, 0x4500, 0x586a, 0x586a, 0x586f, 0x586f, 0x5885, 0x5885 ] - # 1.25, 1.25, 3.02076, 3.02076, 4.71865, 4.71865, 4.90115, 4.90115, 5.0, 5.0, 141.229, 141.229, 141.844, 141.844, 144.581, 144.581 -Results: - - Result: CheckResult - Rule: BufferFloatULP - ULPT: 5 - Actual: Result - Expected: ExpectedResult -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Result - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -... -#--- end - -# UNSUPPORTED: Clang-Vulkan -# Clang-Vulkan is unsupported because of two validation errors -# This issue tracks its resolution: https://github.com/llvm/offload-test-suite/issues/285 -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/distance.32.test b/test/Feature/HLSLLib/distance.32.test deleted file mode 100644 index 318ffd6e..00000000 --- a/test/Feature/HLSLLib/distance.32.test +++ /dev/null @@ -1,123 +0,0 @@ -#--- source.hlsl - -// This test tests eight different distance scenarios -// Two in 1D, 2D, 3D, and 4D - -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Result : register(u2); - -[numthreads(1,1,1)] -void main() { - // distance ({1.125}, {2.375}) = 1.25 - float R0 = distance(X[0].x, Y[0].x); - Result[0] = R0; - float R0_constant = distance(1.125, 2.375); - Result[1] = R0_constant; - - // distance({1.125, 2.5}, {2.375, 5.25}) = 3.02076 - float R1 = distance(X[0].xy, Y[0].xy); - Result[2] = R1; - float R1_constant = distance(float2(1.125, 2.5), float2(2.375, 5.25)); - Result[3] = R1_constant; - - // distance({1.125, 2.5, 4.75}, {2.375, 5.25, 8.375}) = 4.71865 - float R2 = distance(X[0].xyz, Y[0].xyz); - Result[4] = R2; - float R2_constant = distance(float3(1.125, 2.5, 4.75), float3(2.375, 5.25, 8.375)); - Result[5] = R2_constant; - - // distance({1.125, 2.5, 4.75, 6.625}, {2.375, 5.25, 8.375, 5.30}) = 4.90115 - float R3 = distance(X[0], Y[0]); - Result[6] = R3; - float R3_constant = distance(float4(1.125, 2.5, 4.75, 6.625), float4(2.375, 5.25, 8.375, 5.30)); - Result[7] = R3_constant; - - // distance ({-7.29}, {-12.29}) = 5.0 - float R4 = distance(X[1].x, Y[1].x); - Result[8] = R4; - float R4_constant = distance(-7.29, -12.29); - Result[9] = R4_constant; - - // distance({-7.29, 137.14}, {-12.29, -4.0}) = 141.2303 - float R5 = distance(X[1].xy, Y[1].xy); - Result[10] = R5; - float R5_constant = distance(float2(-7.29, 137.14), float2(-12.29, -4.0)); - Result[11] = R5_constant; - - // distance({-7.29, 137.14, 11.1}, {-12.29, -4.0, -2.1}) = 141.745 - float R6 = distance(X[1].xyz, Y[1].xyz); - Result[12] = R6; - float R6_constant = distance(float3(-7.29, 137.14, 11.1), float3(-12.29, -4.0, -2.1)); - Result[13] = R6_constant; - - // distance({-7.29, 137.14, 11.1, -30.5}, {-12.29, -4.0, -2.1, -2.5}) = 141.2445 - float R7 = distance(X[1], Y[1]); - Result[14] = R7; - float R7_constant = distance(float4(-7.29, 137.14, 11.1, -30.5), float4(-12.29, -4.0, -2.1, -2.5)); - Result[15] = R7_constant; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float32 - Stride: 16 - Data: [ 1.125, 2.5, 4.75, 6.625, -7.29, 137.14, 11.1, -30.5 ] - - Name: Y - Format: Float32 - Stride: 16 - Data: [ 2.375, 5.25, 8.375, 5.30, -12.29, -4.0, -2.1, -2.5 ] - - Name: Result - Format: Float32 - Stride: 4 - ZeroInitSize: 64 - - Name: ExpectedResult - Format: Float32 - Stride: 4 - Data: [ 1.25, 1.25, 3.02076, 3.02076, 4.71865, 4.71865, 4.90115, 4.90115, 5.0, 5.0, 141.229, 141.229, 141.844, 141.844, 144.581, 144.581 ] -Results: - - Result: CheckResult - Rule: BufferFloatEpsilon - Epsilon: .0008 - Actual: Result - Expected: ExpectedResult -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Result - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -... -#--- end - -# UNSUPPORTED: Clang-Vulkan -# Clang-Vulkan is unsupported because of two validation errors -# This issue tracks its resolution: https://github.com/llvm/offload-test-suite/issues/285 -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/dot.32.test b/test/Feature/HLSLLib/dot.32.test deleted file mode 100644 index f4543144..00000000 --- a/test/Feature/HLSLLib/dot.32.test +++ /dev/null @@ -1,197 +0,0 @@ -#--- source.hlsl -StructuredBuffer X0 : register(t0); -StructuredBuffer Y0 : register(t1); -StructuredBuffer X1 : register(t2); -StructuredBuffer Y1 : register(t3); -StructuredBuffer X2 : register(t4); -StructuredBuffer Y2 : register(t5); - -RWStructuredBuffer Out0 : register(u6); -RWStructuredBuffer Out1 : register(u7); -RWStructuredBuffer Out2 : register(u8); - - -[numthreads(1,1,1)] -void main() { - // float - Out0[0] = dot(X0[0].x, Y0[0].x); - Out0[1] = dot(float(1.125), float(7.29)); - - Out0[2] = dot(X0[0].xy, Y0[0].xy); - Out0[3] = dot(float2(1.125, -2.5), float2(7.29, 3.14)); - - Out0[4] = dot(X0[0].xyz, Y0[0].xyz); - Out0[5] = dot(float3(1.125, -2.5, -4.75), float3(7.29, 3.14, -1.1)); - - Out0[6] = dot(X0[0], Y0[0]); - Out0[7] = dot(float4(1.125, -2.5, -4.75, 6.625), float4(7.29, 3.14, -1.1, -3.5)); - - // int - Out1[0] = dot(X1[0].x, Y1[0].x); - Out1[1] = dot(int(100), int(25)); - - Out1[2] = dot(X1[0].xy, Y1[0].xy); - Out1[3] = dot(int2(100, -52), int2(25, 43)); - - Out1[4] = dot(X1[0].xyz, Y1[0].xyz); - Out1[5] = dot(int3(100, -52, -210), int3(25, 43, -16)); - - Out1[6] = dot(X1[0], Y1[0]); - Out1[7] = dot(int4(100, -52, -210, 75), int4(25, 43, -16, -62)); - - // uint - Out2[0] = dot(X2[0].x, Y2[0].x); - Out2[1] = dot(uint(100), uint(25)); - - Out2[2] = dot(X2[0].xy, Y2[0].xy); - Out2[3] = dot(uint2(100, 52), uint2(25, 43)); - - Out2[4] = dot(X2[0].xyz, Y2[0].xyz); - Out2[5] = dot(uint3(100, 52, 210), uint3(25, 43, 16)); - - Out2[6] = dot(X2[0], Y2[0]); - Out2[7] = dot(uint4(100, 52, 210, 75), uint4(25, 43, 16, 62)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X0 - Format: Float32 - Stride: 16 - Data: [ 1.125, -2.5, -4.75, 6.625 ] - - Name: Y0 - Format: Float32 - Stride: 16 - Data: [ 7.29, 3.14, -1.1, -3.5 ] - - Name: X1 - Format: Int32 - Stride: 16 - Data: [ 100, -52, -210, 75 ] - - Name: Y1 - Format: Int32 - Stride: 16 - Data: [ 25, 43, -16, -62 ] - - Name: X2 - Format: UInt32 - Stride: 16 - Data: [ 100, 52, 210, 75 ] - - Name: Y2 - Format: UInt32 - Stride: 16 - Data: [ 25, 43, 16, 62 ] - - Name: Out0 - Format: Float32 - Stride: 4 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: Float32 - Stride: 4 - Data: [ 8.20125, 8.20125, 0.35125, 0.35125, 5.57625, 5.57625, -17.61125, -17.61125 ] - - Name: Out1 - Format: Int32 - Stride: 4 - ZeroInitSize: 32 - - Name: ExpectedOut1 - Format: Int32 - Stride: 4 - Data: [ 2500, 2500, 264, 264, 3624, 3624, -1026, -1026 ] - - Name: Out2 - Format: UInt32 - Stride: 4 - ZeroInitSize: 32 - - Name: ExpectedOut2 - Format: UInt32 - Stride: 4 - Data: [ 2500, 2500, 4736, 4736, 8096, 8096, 12746, 12746 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0.008 - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: X0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: X1 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Y1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: X2 - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Y2 - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 -#--- end - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan && !VK_KHR_shader_float_controls2 - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/dot.fp16.test b/test/Feature/HLSLLib/dot.fp16.test deleted file mode 100644 index aa21c6f2..00000000 --- a/test/Feature/HLSLLib/dot.fp16.test +++ /dev/null @@ -1,90 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = dot(X[0].x, Y[0].x); - Out[1] = dot(half(1.125), half(7.29)); - - Out[2] = dot(X[0].xy, Y[0].xy); - Out[3] = dot(half2(1.125, -2.5), half2(7.29, 3.14)); - - Out[4] = dot(X[0].xyz, Y[0].xyz); - Out[5] = dot(half3(1.125, -2.5, -4.75), half3(7.29, 3.14, -1.1)); - - Out[6] = dot(X[0], Y[0]); - Out[7] = dot(half4(1.125, -2.5, -4.75, 6.625), half4(7.29, 3.14, -1.1, -3.5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float16 - Stride: 8 - Data: [ 0x3c80, 0xc100, 0xc4c0, 0x46a0 ] - # [ 1.125, -2.5, -4.75, 6.625 ] - - Name: Y - Format: Float16 - Stride: 8 - Data: [ 0x474a, 0x4247, 0xbc66, 0xc300 ] - # [ 7.29, 3.14, -1.1, -3.5 ] - - Name: Out - Format: Float16 - Stride: 2 - ZeroInitSize: 16 - - Name: ExpectedOut - Format: Float16 - Stride: 2 - Data: [ 0x481a, 0x481a, 0x359f, 0x359f, 0x4594, 0x4594, 0xcc67, 0xcc67 ] - # [ 8.20125, 8.20125, 0.35125, 0.35125, 5.57625, 5.57625, -17.61125, -17.61125 ] -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 30 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# DXC is producing a compile-time value at a significantly higher precision than -# runtime computed values for case 3 (clang will likely do the same once it -# learns to constant evaluate `dot`). - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan && !VK_KHR_shader_float_controls2 - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -Gis -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/dot.fp64.test b/test/Feature/HLSLLib/dot.fp64.test deleted file mode 100644 index f55060a5..00000000 --- a/test/Feature/HLSLLib/dot.fp64.test +++ /dev/null @@ -1,83 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = dot(X[0], Y[0]); - Out[1] = dot(double(1.125), double(7.29)); - - Out[2] = dot(X[1], Y[1]); - Out[3] = dot(double(-2.5), double(3.14)); - - Out[4] = dot(X[2], Y[2]); - Out[5] = dot(double(-4.75), double(-1.1)); - - Out[6] = dot(X[3], Y[3]); - Out[7] = dot(double(6.625), double(-3.5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float64 - Stride: 8 - Data: [ 1.125, -2.5, -4.75, 6.625 ] - - Name: Y - Format: Float64 - Stride: 8 - Data: [ 7.29, 3.14, -1.1, -3.5 ] - - Name: Out - Format: Float64 - Stride: 8 - ZeroInitSize: 64 - - Name: ExpectedOut - Format: Float64 - Stride: 8 - Data: [ 8.20125, 8.20125, -7.85, -7.85, 5.225, 5.225, -23.1875, -23.1875 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0.008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan && !VK_KHR_shader_float_controls2 - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/dot.int16.test b/test/Feature/HLSLLib/dot.int16.test deleted file mode 100644 index 0945698d..00000000 --- a/test/Feature/HLSLLib/dot.int16.test +++ /dev/null @@ -1,137 +0,0 @@ -#--- source.hlsl -StructuredBuffer X0 : register(t0); -StructuredBuffer Y0 : register(t1); -StructuredBuffer X1 : register(t2); -StructuredBuffer Y1 : register(t3); - -RWStructuredBuffer Out0 : register(u4); -RWStructuredBuffer Out1 : register(u5); - - -[numthreads(1,1,1)] -void main() { - // int16_t - Out0[0] = dot(X0[0].x, Y0[0].x); - Out0[1] = dot(int16_t(100), int16_t(25)); - - Out0[2] = dot(X0[0].xy, Y0[0].xy); - Out0[3] = dot(int16_t2(100, -52), int16_t2(25, 43)); - - Out0[4] = dot(X0[0].xyz, Y0[0].xyz); - Out0[5] = dot(int16_t3(100, -52, -210), int16_t3(25, 43, -16)); - - Out0[6] = dot(X0[0], Y0[0]); - Out0[7] = dot(int16_t4(100, -52, -210, 75), int16_t4(25, 43, -16, -62)); - - // uint16_t - Out1[0] = dot(X1[0].x, Y1[0].x); - Out1[1] = dot(uint16_t(100), uint16_t(25)); - - Out1[2] = dot(X1[0].xy, Y1[0].xy); - Out1[3] = dot(uint16_t2(100, 52), uint16_t2(25, 43)); - - Out1[4] = dot(X1[0].xyz, Y1[0].xyz); - Out1[5] = dot(uint16_t3(100, 52, 210), uint16_t3(25, 43, 16)); - - Out1[6] = dot(X1[0], Y1[0]); - Out1[7] = dot(uint16_t4(100, 52, 210, 75), uint16_t4(25, 43, 16, 62)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X0 - Format: Int16 - Stride: 8 - Data: [ 100, -52, -210, 75 ] - - Name: Y0 - Format: Int16 - Stride: 8 - Data: [ 25, 43, -16, -62 ] - - Name: X1 - Format: UInt16 - Stride: 8 - Data: [ 100, 52, 210, 75 ] - - Name: Y1 - Format: UInt16 - Stride: 8 - Data: [ 25, 43, 16, 62 ] - - Name: Out0 - Format: Int16 - Stride: 2 - ZeroInitSize: 16 - - Name: ExpectedOut0 - Format: Int16 - Stride: 2 - Data: [ 2500, 2500, 264, 264, 3624, 3624, -1026, -1026 ] - - Name: Out1 - Format: UInt16 - Stride: 2 - ZeroInitSize: 16 - - Name: ExpectedOut1 - Format: UInt16 - Stride: 2 - Data: [ 2500, 2500, 4736, 4736, 8096, 8096, 12746, 12746 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: X0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: X1 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Y1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/dot.int64.test b/test/Feature/HLSLLib/dot.int64.test deleted file mode 100644 index 1dbd1263..00000000 --- a/test/Feature/HLSLLib/dot.int64.test +++ /dev/null @@ -1,137 +0,0 @@ -#--- source.hlsl -StructuredBuffer X0 : register(t0); -StructuredBuffer Y0 : register(t1); -StructuredBuffer X1 : register(t2); -StructuredBuffer Y1 : register(t3); - -RWStructuredBuffer Out0 : register(u4); -RWStructuredBuffer Out1 : register(u5); - - -[numthreads(1,1,1)] -void main() { - // int64_t - Out0[0] = dot(X0[0].x, Y0[0].x); - Out0[1] = dot(int64_t(100), int64_t(25)); - - Out0[2] = dot(X0[0].xy, Y0[0].xy); - Out0[3] = dot(int64_t2(100, -52), int64_t2(25, 43)); - - Out0[4] = dot(X0[0].xyz, Y0[0].xyz); - Out0[5] = dot(int64_t3(100, -52, -210), int64_t3(25, 43, -16)); - - Out0[6] = dot(X0[0], Y0[0]); - Out0[7] = dot(int64_t4(100, -52, -210, 75), int64_t4(25, 43, -16, -62)); - - // uint64_t - Out1[0] = dot(X1[0].x, Y1[0].x); - Out1[1] = dot(uint64_t(100), uint64_t(25)); - - Out1[2] = dot(X1[0].xy, Y1[0].xy); - Out1[3] = dot(uint64_t2(100, 52), uint64_t2(25, 43)); - - Out1[4] = dot(X1[0].xyz, Y1[0].xyz); - Out1[5] = dot(uint64_t3(100, 52, 210), uint64_t3(25, 43, 16)); - - Out1[6] = dot(X1[0], Y1[0]); - Out1[7] = dot(uint64_t4(100, 52, 210, 75), uint64_t4(25, 43, 16, 62)); - } -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X0 - Format: Int64 - Stride: 32 - Data: [ 100, -52, -210, 75 ] - - Name: Y0 - Format: Int64 - Stride: 32 - Data: [ 25, 43, -16, -62 ] - - Name: X1 - Format: UInt64 - Stride: 32 - Data: [ 100, 52, 210, 75 ] - - Name: Y1 - Format: UInt64 - Stride: 32 - Data: [ 25, 43, 16, 62 ] - - Name: Out0 - Format: Int64 - Stride: 8 - ZeroInitSize: 64 - - Name: ExpectedOut0 - Format: Int64 - Stride: 8 - Data: [ 2500, 2500, 264, 264, 3624, 3624, -1026, -1026 ] - - Name: Out1 - Format: UInt64 - Stride: 8 - ZeroInitSize: 64 - - Name: ExpectedOut1 - Format: UInt64 - Stride: 8 - Data: [ 2500, 2500, 4736, 4736, 8096, 8096, 12746, 12746 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: X0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: X1 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Y1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# REQUIRES: Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/dot2add.test b/test/Feature/HLSLLib/dot2add.test deleted file mode 100644 index fec2ff8a..00000000 --- a/test/Feature/HLSLLib/dot2add.test +++ /dev/null @@ -1,101 +0,0 @@ -#--- source.hlsl -StructuredBuffer A : register(t0); -StructuredBuffer B : register(t1); -StructuredBuffer Acc : register(t2); - -RWStructuredBuffer Out : register(u3); - - -[numthreads(12,1,1)] -void main(uint3 DTID : SV_DispatchThreadID) { - Out[DTID.x] = dot2add(A[DTID.x], B[DTID.x], Acc[DTID.x]); - - if (DTID.x == 0) { - // The constant cases are independent of thread id - Out[12] = dot2add(half2(1, 2), half2(3, 4), float(0)); - Out[13] = dot2add(half2(-1, 2), half2(3, -4), float(10)); - Out[14] = dot2add(half2(65504, 1), half2(1, 65504), float(0)); - Out[15] = dot2add(half2(1, -65504), half2(-65504, 1), float(-10000000)); - } -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: A - Format: Float16 - Stride: 4 - Data: [ 0x3c00, 0x4000, 0x3c00, 0xc000, 0x3c00, 0x4000, 0xbc00, 0x4000, 0x3c00, 0x4000, 0xbc00, 0x4000, 0x3c00, 0x4000, 0xbc00, 0xc000, 0x7bff, 0x3c00, 0xfbff, 0x3c00, 0x3c00, 0x7bff, 0x3c00, 0xfbff ] - # 1, 2, 1, -2, 1, 2, -1, 2, 1, 2, -1, 2, 1, 2, -1, -2, 65504, 1, -65504, 1, 1, 65504, 1, -65504 - - Name: B - Format: Float16 - Stride: 4 - Data: [ 0x4200, 0x4400, 0xc200, 0x4400, 0x4200, 0x4400, 0x4200, 0xc400, 0x4200, 0x4400, 0xc200, 0x4400, 0x4200, 0x4400, 0xc200, 0xc400, 0x3c00, 0x7bff, 0x3c00, 0xfbff, 0x7bff, 0x3c00, 0xfbff, 0x3c00 ] - # 3, 4, -3, 4, 3, 4, 3, -4, 3, 4, -3, 4, 3, 4, -3, -4, 1, 65504, 1, -65504, 65504, 1, -65504, 1 - - Name: Acc - Format: Float32 - Stride: 4 - Data: [ 0, 0, 10, 10, -5, -5, -30, -30, 0, 0, 10000000, -10000000 ] - - Name: Out - Format: Float32 - Stride: 4 - ZeroInitSize: 64 - - Name: ExpectedOut - Format: Float32 - Stride: 4 - Data: [ 11, -11, 21, -1, 6, 6, -19, -19, 131008, -131008, 10131008, -10131008, 11, -1, 131008, -10131008 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0.008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: A - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: B - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Acc - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# https://github.com/llvm/offload-test-suite/issues/341 -# XFAIL: Metal - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7695 -# XFAIL: DXC-Vulkan - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/dot4add.test b/test/Feature/HLSLLib/dot4add.test deleted file mode 100644 index ad332841..00000000 --- a/test/Feature/HLSLLib/dot4add.test +++ /dev/null @@ -1,82 +0,0 @@ -#--- source.hlsl - -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); -RWStructuredBuffer Result : register(u2); - -[numthreads(1,1,1)] -void main() { - // dot4add({1, 1, 1, 1}, {1, 2, -128, -86}, 0) = -211 = 0xFF2D - uint32_t R0 = dot4add_i8packed(X[0], Y[0], 0u); - Result[0] = R0; - // dot4add({2, 4, 8, -1}, {2, 2, 2, 1}, -211) = -184 = 0xFF48 - Result[1] = dot4add_i8packed(X[1], Y[1], R0); - - // dot4add({1, 1, 1, 1}, {1, 2, 128, 170}, 0) = 301 = 0x012D - uint32_t R1 = dot4add_u8packed(X[0], Y[0], 0u); - Result[2] = R1; - // dot4add({2, 4, 8, 255}, {2, 2, 2, 1}, 301) = 584 = 0x0248 - Result[3] = dot4add_u8packed(X[1], Y[1], R1); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Hex32 - Stride: 4 - Data: [ 0x01010101, 0x020408FF ] - - Name: Y - Format: Hex32 - Stride: 4 - Data: [ 0x010280AA, 0x02020201 ] - - Name: Result - Format: Hex32 - Stride: 4 - ZeroInitSize: 16 - - Name: ExpectedResult - Format: Hex32 - Stride: 4 - Data: [ 0xFFFFFF2D, 0xFFFFFF48, 0x0000012D, 0x00000248 ] -Results: - - Result: CheckResult - Rule: BufferExact - Actual: Result - Expected: ExpectedResult -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Result - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -... -#--- end - -# We don't yet support PackedVectorFormat4x8Bit -# UNSUPPORTED: Clang-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/exp.16.test b/test/Feature/HLSLLib/exp.16.test deleted file mode 100644 index fc8f9593..00000000 --- a/test/Feature/HLSLLib/exp.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = exp(In[0]); - half4 Tmp = {exp(In[1].xyz), exp(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {exp(In[2].xy), exp(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0xbc00, 0x4000, 0x7e00, 0x7e00, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, -1, 2, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x0000, 0x3c00, 0x3c00, 0x3c00, 0x3c00, 0x7c00, 0x35e3, 0x4764, 0x7e00, 0x7e00, 0x7e00,] - # NaN, 0, 1, 1, 1, 1, Inf, 0.367879441, 7.38905609893, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/exp.32.test b/test/Feature/HLSLLib/exp.32.test deleted file mode 100644 index a5e27b49..00000000 --- a/test/Feature/HLSLLib/exp.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = exp(In[0]); - float4 Tmp = {exp(In[1].xyz), exp(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {exp(In[2].xy), exp(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, -1, 10, nan, nan, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, -1, 10, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, 0, 1, 1, 1, 1, inf, 0.367879441, 22026.46579, nan, nan, nan,] - # NaN, 0, 1, 1, 1, 1, Inf, 0.367879441, 22026.46579, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/exp2.16.test b/test/Feature/HLSLLib/exp2.16.test deleted file mode 100644 index 8abbe5f2..00000000 --- a/test/Feature/HLSLLib/exp2.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = exp2(In[0]); - half4 Tmp = {exp2(In[1].xyz), exp2(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {exp2(In[2].xy), exp2(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x4000, 0x4200, 0x3800,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 2, 3, 1/2, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0, 0x3c00, 0x3c00, 0x3c00, 0x3c00, 0x7c00, 0x4000, 0x3800, 0x4400, 0x4800, 0x3da8,] - # NaN, 0, 1, 1, 1, 1, inf, 2, 0.5, 4, 8, 1.4140625, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/exp2.32.test b/test/Feature/HLSLLib/exp2.32.test deleted file mode 100644 index bf2cfee4..00000000 --- a/test/Feature/HLSLLib/exp2.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = exp2(In[0]); - float4 Tmp = {exp2(In[1].xyz), exp2(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {exp2(In[2].xy), exp2(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 1, -1, 2, 3, 0.5,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 2, 3, 1/2, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, 0, 1, 1, 1, 1, inf, 2, 0.5, 4, 8, 1.4142135,] - # NaN, 0, 1, 1, 1, 1, inf, 2, 0.5, 4, 8, 1.4142135, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/firstbithigh.16.test b/test/Feature/HLSLLib/firstbithigh.16.test deleted file mode 100644 index e01d2e03..00000000 --- a/test/Feature/HLSLLib/firstbithigh.16.test +++ /dev/null @@ -1,99 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out : register(u2); - -[numthreads(1,1,1)] -void main() { - Out[0] = firstbithigh(In1[0]); - uint32_t4 Out1 = {firstbithigh(In1[0].xyz), firstbithigh(In1[0].w)}; - uint32_t4 Out2 = {firstbithigh(In2[0].xy), firstbithigh(In2[0].zw)}; - Out[1]= Out1; - Out[2] = Out2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Hex16 - Stride: 8 - Data: [ - 0x0000, - 0x0009, - 0x0001, - 0xFFFF, - ] - - Name: In2 - Format: Int16 - Stride: 8 - # For signed, negative values the index of the first 0 from MSB is returned instead. - Data: [ - -1, # All 1s -> return not found terminal - -8, - 9, - 0 - ] - - Name: Out - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - # All bits set (4294967295) is returned when no bit is set on the input - Data: [4294967295, 3, 0, 15, 4294967295, 3, 0, 15, 4294967295, 2, 3, 4294967295] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -... -#--- end - -# REQUIRES: Int16 - -# No bit set terminal is returned as 4294901776 instead of 4294967295 -# XFAIL: DXC-Metal -# Fails with 'gpu-exec: error: Failed to materializeAll.:' -# XFAIL: Clang-Metal - -# https://github.com/llvm/llvm-project/issues/145752 -# XFAIL: Clang-DirectX - -# 16/64 bit firstbithigh doesn't have a DXC-Vulkan lowering -# https://github.com/microsoft/DirectXShaderCompiler/blob/48d6e3c635f0ab3ae79580c37003e6faeca6c671/tools/clang/test/CodeGenSPIRV/intrinsics.firstbitlow.64bit.hlsl#L5 -# UNSUPPORTED: DXC-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/firstbithigh.32.test b/test/Feature/HLSLLib/firstbithigh.32.test deleted file mode 100644 index 3b738118..00000000 --- a/test/Feature/HLSLLib/firstbithigh.32.test +++ /dev/null @@ -1,88 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out : register(u2); - -[numthreads(1,1,1)] -void main() { - Out[0] = firstbithigh(In1[0]); - uint32_t4 Out1 = {firstbithigh(In1[0].xyz), firstbithigh(In1[0].w)}; - uint32_t4 Out2 = {firstbithigh(In2[0].xy), firstbithigh(In2[0].zw)}; - Out[1]= Out1; - Out[2] = Out2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Hex32 - Stride: 16 - Data: [ - 0x00000000, - 0x00000009, - 0x00000001, - 0xFFFFFFFF, - ] - - Name: In2 - Format: Int32 - Stride: 16 - # For signed, negative values the index of the first 0 from MSB is returned instead. - Data: [ - -1, # All 1s -> return not found terminal - -8, - 9, - 0 - ] - - Name: Out - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - # All bits set (4294967295) is returned when no bit is set on the input - Data: [4294967295, 3, 0, 31, 4294967295, 3, 0, 31, 4294967295, 2, 3, 4294967295] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/145752 -# XFAIL: Clang-DirectX || Clang-Metal - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/firstbithigh.64.test b/test/Feature/HLSLLib/firstbithigh.64.test deleted file mode 100644 index 075259a8..00000000 --- a/test/Feature/HLSLLib/firstbithigh.64.test +++ /dev/null @@ -1,105 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out : register(u2); - -[numthreads(1,1,1)] -void main() { - Out[0] = firstbithigh(In1[0]); - uint32_t4 Out1 = {firstbithigh(In1[0].xyz), firstbithigh(In1[0].w)}; - uint32_t4 Out2 = {firstbithigh(In2[0].xy), firstbithigh(In2[0].zw)}; - Out[1]= Out1; - Out[2] = Out2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Hex64 - Stride: 32 - Data: [ - 0x0000000000000000, - 0x0000000000000009, - 0x0000000000000001, - 0xFFFFFFFFFFFFFFFF, - ] - - Name: In2 - Format: Int64 - Stride: 32 - # For signed, negative values the index of the first 0 from MSB is returned instead. - Data: [ - -1, # All 1s -> return not found terminal - -8, - 9, - 0 - ] - - Name: Out - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - # All bits set (4294967295) is returned when no bit is set on the input - Data: [4294967295, 3, 0, 63, 4294967295, 3, 0, 63, 4294967295, 2, 3, 4294967295] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -... -#--- end - -# REQUIRES: Int64 - -# Fails with 'gpu-exec: error: Failed to materializeAll.:' -# XFAIL: Metal - -# 16/64 bit firstbithigh doesn't have a DXC-Vulkan lowering -# https://github.com/microsoft/DirectXShaderCompiler/blob/48d6e3c635f0ab3ae79580c37003e6faeca6c671/tools/clang/test/CodeGenSPIRV/intrinsics.firstbitlow.64bit.hlsl#L5 -# UNSUPPORTED: DXC-Vulkan - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7502 -# WARP firstbithigh(s64 -1) should return -1 but returns 32 on older versions of -# Warp. This was fixed for x86 in 1.0.14, but not for arm64. -# XFAIL: DirectX-WARP - -# https://github.com/llvm/llvm-project/issues/143171 -# XFAIL: Clang-Vulkan - -# https://github.com/llvm/llvm-project/issues/145752 -# XFAIL: Clang-DirectX - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/firstbitlow.16.test b/test/Feature/HLSLLib/firstbitlow.16.test deleted file mode 100644 index 2c5f2dd0..00000000 --- a/test/Feature/HLSLLib/firstbitlow.16.test +++ /dev/null @@ -1,88 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out : register(u2); - -[numthreads(1,1,1)] -void main() { - Out[0] = firstbitlow(In1[0]); - uint32_t4 Out1 = {firstbitlow(In1[0].xyz), firstbitlow(In1[0].w)}; - uint32_t4 Out2 = {firstbitlow(In2[0].xy), firstbitlow(In2[0].zw)}; - Out[1] = Out1; - Out[2] = Out2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Hex16 - Stride: 8 - Data: [ - 0x0000, - 0x00E8, - 0x8000, - 0xFFFF, - ] - - Name: In2 - Format: Int16 - Stride: 8 - Data: [-1, -8, 8, 0] - - Name: Out - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - # All bits set (4294967295) is returned when no bit is set on the input - Data: [4294967295, 3, 15, 0, 4294967295, 3, 15, 0, 0, 3, 3, 4294967295] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -... -#--- end - -# REQUIRES: Int16 - -# Fails with 'gpu-exec: error: Failed to materializeAll.:' -# XFAIL: Metal - -# 16/64 bit firstbitlow doesn't have a DXC-Vulkan lowering -# https://github.com/microsoft/DirectXShaderCompiler/blob/48d6e3c635f0ab3ae79580c37003e6faeca6c671/tools/clang/test/CodeGenSPIRV/intrinsics.firstbitlow.64bit.hlsl#L5 -# UNSUPPORTED: DXC-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/firstbitlow.32.test b/test/Feature/HLSLLib/firstbitlow.32.test deleted file mode 100644 index 9942dae4..00000000 --- a/test/Feature/HLSLLib/firstbitlow.32.test +++ /dev/null @@ -1,79 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out : register(u2); - -[numthreads(1,1,1)] -void main() { - Out[0] = firstbitlow(In1[0]); - uint4 Out1 = {firstbitlow(In1[0].xyz), firstbitlow(In1[0].w)}; - uint4 Out2 = {firstbitlow(In2[0].xy), firstbitlow(In2[0].zw)}; - Out[1] = Out1; - Out[2] = Out2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Hex32 - Stride: 16 - Data: [ - 0x00000000, - 0x000000E8, - 0x80000000, - 0xFFFFFFFF, - ] - - Name: In2 - Format: Int32 - Stride: 16 - Data: [-1, -8, 8, 0] - - Name: Out - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - # All bits set (4294967295) is returned when no bit is set on the input - Data: [4294967295, 3, 31, 0, 4294967295, 3, 31, 0, 0, 3, 3, 4294967295] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/firstbitlow.64.test b/test/Feature/HLSLLib/firstbitlow.64.test deleted file mode 100644 index 9044ce77..00000000 --- a/test/Feature/HLSLLib/firstbitlow.64.test +++ /dev/null @@ -1,91 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out : register(u2); - -[numthreads(1,1,1)] -void main() { - Out[0] = firstbitlow(In1[0]); - uint32_t4 Out1 = {firstbitlow(In1[0].xyz), firstbitlow(In1[0].w)}; - uint32_t4 Out2 = {firstbitlow(In2[0].xy), firstbitlow(In2[0].zw)}; - Out[1] = Out1; - Out[2] = Out2; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Hex64 - Stride: 32 - Data: [ - 0x0000000000000000, - 0x00000000000000E8, - 0x8000000000000000, - 0xFFFFFFFFFFFFFFFF, - ] - - Name: In2 - Format: Int64 - Stride: 32 - Data: [-1, -8, 8, 0] - - Name: Out - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - # All bits set (4294967295) is returned when no bit is set on the input - Data: [4294967295, 3, 63, 0, 4294967295, 3, 63, 0, 0, 3, 3, 4294967295] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -... -#--- end - -# REQUIRES: Int64 - -# Fails with 'gpu-exec: error: Failed to materializeAll.:' -# XFAIL: Metal - -# 16/64 bit firstbitlow doesn't have a DXC-Vulkan lowering -# https://github.com/microsoft/DirectXShaderCompiler/blob/48d6e3c635f0ab3ae79580c37003e6faeca6c671/tools/clang/test/CodeGenSPIRV/intrinsics.firstbitlow.64bit.hlsl#L5 -# UNSUPPORTED: DXC-Vulkan - -# https://github.com/llvm/llvm-project/issues/143003 -# XFAIL: Clang-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/floor.16.test b/test/Feature/HLSLLib/floor.16.test deleted file mode 100644 index 60bd39bd..00000000 --- a/test/Feature/HLSLLib/floor.16.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = floor(In[0]); - half4 Tmp = {floor(In[1].xyz), floor(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {floor(In[2].xy), floor(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = floor(In[3]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x4900, 0x4933, 0x4940, 0x494d, 0xc900, 0xc933, 0xc940, 0xc94d, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 10.0, 10.4, 10.5, 10.6, -10.0, -10.4, -10.5, -10.6, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0xbc00, 0x8000, 0x0000, 0x0000, 0x7c00, 0x4900, 0x4900, 0x4900, 0x4900, 0xc900, 0xc980, 0xc980, 0xc980, 0x7e00,] - # NaN, -Inf, -1, -0, 0, 0, Inf, 10.0, 10.0, 10.0, 10.0, -10.0, -11.0, -11.0, -11.0, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/floor.32.test b/test/Feature/HLSLLib/floor.32.test deleted file mode 100644 index 55ef9eb3..00000000 --- a/test/Feature/HLSLLib/floor.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = floor(In[0]); - float4 Tmp = {floor(In[1].xyz), floor(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {floor(In[2].xy), floor(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = floor(In[3]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, 0, -0, 0, 0x1.e7d42cp-127, inf, 10.0, 10.4, 10.5, 10.6, -10.0, -10.4, -10.5, -10.6, nan,] - # NaN, -Inf, 0, -0, 0, denorm, Inf, 10.0, 10.4, 10.5, 10.6, -10.0, -10.4, -10.5, -10.6, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, -inf, 0, -0, 0, 0, inf, 10.0, 10.0, 10.0, 10.0, -10.0, -11.0, -11.0, -11.0, nan,] - # NaN, -Inf, 0, -0, 0, 0, Inf, 10.0, 10.0, 10.0, 10.0, -10.0, -11.0, -11.0, -11.0, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/fmod.16.test b/test/Feature/HLSLLib/fmod.16.test deleted file mode 100644 index 6b5b5f8d..00000000 --- a/test/Feature/HLSLLib/fmod.16.test +++ /dev/null @@ -1,78 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); - -RWStructuredBuffer Out0 : register(u2); - -[numthreads(1,1,1)] -void main() { - Out0[0] = fmod(In0[0], In1[0]); - Out0[1] = half4(fmod(In0[1].xyz, In1[1].xyz), fmod(In0[1].w, In1[1].w)); - Out0[2] = half4(fmod(In0[2].xy, In1[2].xy), fmod(In0[2].zw, In1[2].zw)); - Out0[3] = fmod(half4(10.5, -99.5, 5, 0.25), half4(1, -3, 0.25, 5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float16 - Stride: 8 - Data: [0x4940, 0xd638, 0x4500, 0x3400, 0, 0x6056, 0x4940, 0xd638, 0x4500, 0x3400, 0, 0x6056] - # 10.5, -99.5, 5, 0.25, 0, 555, 10.5, -99.5, 5, 0.25, 0, 555 - - Name: In1 - Format: Float16 - Stride: 8 - Data: [0x3c00, 0xc200, 0x3400, 0x4500, 0x4900, 0x4100, 0x3c00, 0xc200, 0x3400, 0x4500, 0x4900, 0x4100] - # 1, -3, 0.25, 5, 10, 2.5, 1, -3, 0.25, 5, 10, 2.5 - - Name: Out0 - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: Float16 - Stride: 8 - Data: [ 0x3800, 0xB800, 0x0, 0x3400, 0x0, 0x0, 0x3800, 0xB800, 0x0, 0x3400, 0x0, 0x0, 0x3800, 0xB800, 0x0, 0x3400 ] - # 0.5, -0.5, 0, 0.25, 0, 0, 0.5 -0.5, 0, 0.25, 0, 0, 0.5, -0.5, 0, 0.25 -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0.04 - Actual: Out0 - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan && !VK_KHR_shader_float_controls2 - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -Gis -HV 202x -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/fmod.32.test b/test/Feature/HLSLLib/fmod.32.test deleted file mode 100644 index d64d8898..00000000 --- a/test/Feature/HLSLLib/fmod.32.test +++ /dev/null @@ -1,74 +0,0 @@ -#--- source.hlsl -StructuredBuffer In0 : register(t0); -StructuredBuffer In1 : register(t1); - -RWStructuredBuffer Out0 : register(u2); - -[numthreads(1,1,1)] -void main() { - Out0[0] = fmod(In0[0], In1[0]); - Out0[1] = float4(fmod(In0[1].xyz, In1[1].xyz), fmod(In0[1].w, In1[1].w)); - Out0[2] = float4(fmod(In0[2].xy, In1[2].xy), fmod(In0[2].zw, In1[2].zw)); - Out0[3] = fmod(float4(10.10, -99.99, 5, 0.25), float4(1.1, -3, 0.25, 5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In0 - Format: Float32 - Stride: 16 - Data: [10.10, -99.99, 5, 0.25, 0, 6555.555, 10.10, -99.99, 5, 0.25, 0, 6555.555] - - Name: In1 - Format: Float32 - Stride: 16 - Data: [1.1, -3, 0.25, 5, 10, 2.22, 1.1, -3, 0.25, 5, 10, 2.22] - - Name: Out0 - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut0 - Format: Float32 - Stride: 16 - Data: [ 0.2, -0.99, 0, 0.25, 0, 2.1154, 0.2, -0.99, 0, 0.25, 0, 2.1154, 0.2, -0.99, 0, 0.25 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out0 - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: In0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan && !VK_KHR_shader_float_controls2 - -# RUN: split-file %s %t -# RUN: %dxc_target -Gis -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/frac.16.test b/test/Feature/HLSLLib/frac.16.test deleted file mode 100644 index 2989b465..00000000 --- a/test/Feature/HLSLLib/frac.16.test +++ /dev/null @@ -1,70 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = frac(In[0]); - half4 Tmp = {frac(In[1].xyz), frac(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {frac(In[2].xy), frac(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x03FF, 0x8000, 0x0000, 0x7c00, 0xbc00, 0x4170, 0x63d1, 0xC764, 0x7e00, 0x7e00,] - # NaN, -Inf, 0.00006097555, -0, 0, Inf, -1, 2.719, 1000.5, -7.390625, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0x03FF, 0x0000, 0x0000, 0x7e00, 0x0000, 0x39c1, 0x3800, 0x38E1, 0x7e00, 0x7e00,] - # NaN, NaN, 0.00006097555, 0, 0, NaN, 0, 0.719, 0.5, 0.6098633, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/KhronosGroup/SPIRV-Cross/issues/2525 -# XFAIL: Vulkan-Darwin - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/frac.32.test b/test/Feature/HLSLLib/frac.32.test deleted file mode 100644 index d045e6e1..00000000 --- a/test/Feature/HLSLLib/frac.32.test +++ /dev/null @@ -1,66 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = frac(In[0]); - float4 Tmp = {frac(In[1].xyz), frac(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {frac(In[2].xy), frac(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, 0, -0, 0, 1.5, inf, -1, 2.718280, 1000.599976, -7.389, nan,] - # NaN, -Inf, 0, -0, 0, 1.5, Inf, -1, 2.718280, 1000.599976, -7.389, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, nan, 0, 0, 0, .5, nan, 0, 0.718280, 0.599976, 0.611, nan,] - # NaN, NaN, 0, 0, 0, .5, NaN, 0, 0.718280, 0.599976, 0.611, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/isinf.16.test b/test/Feature/HLSLLib/isinf.16.test deleted file mode 100644 index 3458a80e..00000000 --- a/test/Feature/HLSLLib/isinf.16.test +++ /dev/null @@ -1,71 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = isinf(In[0]); - bool4 Tmp = {isinf(In[0].xyz), isinf(In[0].w)}; - Out[1] = Tmp; - Out[2].xy = isinf(In[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [0x7c00, 0xfc00, 0x3c00, 0x7e00] # Inf, -Inf, 1, Nan - - Name: Out - Format: Bool - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Bool - Stride: 16 - Data: [1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# llvm/llvm-project#141089 -# XFAIL: Clang-Vulkan - -# https://github.com/llvm/llvm-project/issues/145571 -# XFAIL: Clang && DirectX-NV - -# A bug in the Metal Shader Converter caused it to mis-translate this operation. -# Version 3 fixes this issue. -# UNSUPPORTED: Clang-Metal && !metal-shaderconverter-3.0.0-or-later - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/isinf.32.test b/test/Feature/HLSLLib/isinf.32.test deleted file mode 100644 index acef3f8b..00000000 --- a/test/Feature/HLSLLib/isinf.32.test +++ /dev/null @@ -1,63 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = isinf(In[0]); - bool4 Tmp = {isinf(In[0].xyz), isinf(In[0].w)}; - Out[1] = Tmp; - Out[2].xy = isinf(In[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [inf, -inf, 1.0, nan] # Inf, -Inf, 1, Nan - - Name: Out - Format: Bool - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Bool - Stride: 16 - Data: [1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# llvm/llvm-project#141089 -# XFAIL: Clang-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/ldexp.16.test b/test/Feature/HLSLLib/ldexp.16.test deleted file mode 100644 index a96d9a47..00000000 --- a/test/Feature/HLSLLib/ldexp.16.test +++ /dev/null @@ -1,79 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - // X Exp - // ------------------ - // 0x4248 0x8000 - // 0x0000 0x3C00 - // 0xFC00 0xB800 - // 0x7E00 0x7C00 - // 0x8000 0x3C00 - // 0x3C00 0x0000 - // 0xB800 0xFC00 - // 0x7C00 0x7E00 - // 0x4248 0xB800 - // 0x0000 0x7C00 - // 0x3C00 0x4C00 (overflow) - // 0x3C00 0xCE40 (underflow) - Out[0] = ldexp(In[0], In[1]); - Out[1].x = ldexp(In[1].x, In[1].y); - Out[1].yzw = ldexp(In[1].yzw, In[0].yzw); - Out[2].xy = ldexp(In[0].xy, In[1].zw); - Out[2].zw = ldexp(In[2].xy, In[2].zw); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x4248, 0x0000, 0xFC00, 0x7E00, 0x8000, 0x3C00, 0xB800, 0x7C00, 0x3C00, 0x3C00, 0x4C00, 0xCE40 ] # [ 3.140625, 0, -inf, NaN, -0, 1, -0.5, inf, 1, 1, 16, -25 ] - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x4248, 0x0000, 0xFC00, 0x7E00, 0x8000, 0x3C00, 0x8000, 0x7E00, 0x4071, 0x7E00, 0x7C00, 0x0000 ] # [ 3.140625, 0, -inf, NaN, -0, 1, -0, NaN, 2.220703, NaN, inf, 0 ] -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half - -# UNSUPPORTED: Clang-Vulkan -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/ldexp.32.test b/test/Feature/HLSLLib/ldexp.32.test deleted file mode 100644 index c7bc9326..00000000 --- a/test/Feature/HLSLLib/ldexp.32.test +++ /dev/null @@ -1,77 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - // X Exp - // ------------------ - // 3.14159 -0 - // 0 1 - // -inf -0.5 - // NaN inf - // -0 1 - // 1 0 - // -0.5 -inf - // inf NaN - // 3.14159 -0.5 - // 0 inf - // 1 128 (overflow) - // 1 -150 (underflow) - Out[0] = ldexp(In[0], In[1]); - Out[1].x = ldexp(In[1].x, In[1].y); - Out[1].yzw = ldexp(In[1].yzw, In[0].yzw); - Out[2].xy = ldexp(In[0].xy, In[1].zw); - Out[2].zw = ldexp(In[2].xy, In[2].zw); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ 3.14159, 0, -inf, NaN, -0, 1, -0.5, inf, 1, 1, 128, -150 ] - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ 3.14159, 0, -inf, NaN, -0, 1, -0, NaN, 2.2214396, NaN, inf, 0 ] -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# UNSUPPORTED: Clang-Vulkan -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/length.16.test b/test/Feature/HLSLLib/length.16.test deleted file mode 100644 index 2b87f722..00000000 --- a/test/Feature/HLSLLib/length.16.test +++ /dev/null @@ -1,65 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = length(In[0]); - Out[1] = length(In[1].x); - Out[2] = length(In[1].yzw); - Out[3] = length(In[2].xy); - Out[4] = length(In[2].zw); - Out[5] = length(half4(4, 4, 4, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x4400, 0x4400, 0x4400, 0x4400, 0x4248, 0x0000, 0x4500, 0x4a00, 0x0000, 0x4500, 0x4000, 0x4200 ] - # 4, 4, 4, 4, 3.14159, 0, 5, 12, 0, 5, 2, 3 - - Name: Out - Format: Float16 - Stride: 2 - ZeroInitSize: 12 - - Name: ExpectedOut - Format: Float16 - Stride: 2 - Data: [ 0x4800, 0x4248, 0x4a80, 0x4500, 0x4336, 0x4800 ] - # 8, 3.14159, 13, 5, 3.60555, 8 -Results: - - Result: Test0 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/length.32.test b/test/Feature/HLSLLib/length.32.test deleted file mode 100644 index c6c39c51..00000000 --- a/test/Feature/HLSLLib/length.32.test +++ /dev/null @@ -1,63 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = length(In[0]); - Out[1] = length(In[1].x); - Out[2] = length(In[1].yzw); - Out[3] = length(In[2].xy); - Out[4] = length(In[2].zw); - Out[5] = length(float4(4, 4, 4, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ 4, 4, 4, 4, 3.14159, 0, 5, 12, 0, 5, 2, 3 ] - - Name: Out - Format: Float32 - Stride: 4 - ZeroInitSize: 24 - - Name: ExpectedOut - Format: Float32 - Stride: 4 - Data: [ 8, 3.14159, 13, 5, 3.60555, 8 ] -Results: - - Result: Test0 - Rule: BufferFloatULP - ULPT: 5 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/lerp.16.test b/test/Feature/HLSLLib/lerp.16.test deleted file mode 100644 index a5401008..00000000 --- a/test/Feature/HLSLLib/lerp.16.test +++ /dev/null @@ -1,92 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); -StructuredBuffer S : register(t2); - -RWStructuredBuffer Out : register(u3); - - -[numthreads(1,1,1)] -void main() { - Out[0] = lerp(X[0], Y[0], S[0]); - Out[1] = half4(lerp(X[1].xyz, Y[1].xyz, S[1].xyz), lerp(X[1].w, Y[1].w, S[1].w)); - Out[2] = half4(lerp(X[2].xy, Y[2].xy, S[2].xy), lerp(X[2].zw, Y[2].zw, S[2].zw)); - Out[3] = lerp(half4(1, 2, -3, 4), half4(8, -7, 6, 5), half4(0.25, 0.5, 0.75, 0)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0x4000, 0xc200, 0x4400, 0x0000, 0x4d13, 0x4fa0, 0x4500, 0xc500, 0xcc00, 0x3c00, 0x4000 ] - # 1, 2, -3, 4, 0, 20.3, 30.5, 5, -5, -16, 1, 2 - - Name: Y - Format: Float16 - Stride: 8 - Data: [ 0x4800, 0xc700, 0x4600, 0x4500, 0x0000, 0x538a, 0x4fa0, 0x4b80, 0xcb80, 0xc800, 0x5640, 0x5a40 ] - # 8, -7, 6, 5, 0, 60.3, 30.5, 15, -15, -8, 100, 200 - - Name: S - Format: Float16 - Stride: 8 - Data: [ 0x3400, 0x3800, 0x3a00, 0x0000, 0x3800, 0x3666, 0x3a66, 0x3a66, 0x3666, 0x38cd, 0x2e66, 0x3b33 ] - # 0.25, 0.5, 0.75, 0, 0.5, 0.4, 0.8, 0.8, 0.4, 0.6, 0.1, 0.9 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: Float16 - Stride: 8 - Data: [ 0x4180, 0xc100, 0x4380, 0x4400, 0x0000, 0x508a, 0x4fa0, 0x4a80, 0xc880, 0xc99a, 0x4973, 0x59a2, 0x4180, 0xc100, 0x4380, 0x4400 ] - # 2.75, -2.5, 3.75, 4, 0, 36.3, 30.5, 13, -9, -11.2, 10.9, 180.2, 2.75, -2.5, 3.75, 4 -Results: - - Result: Test0 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: S - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# Bug https://github.com/microsoft/DirectXShaderCompiler/issues/7710 -# XFAIL: DXC-Vulkan - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/lerp.32.test b/test/Feature/HLSLLib/lerp.32.test deleted file mode 100644 index ee50f67e..00000000 --- a/test/Feature/HLSLLib/lerp.32.test +++ /dev/null @@ -1,84 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); -StructuredBuffer S : register(t2); - -RWStructuredBuffer Out : register(u3); - - -[numthreads(1,1,1)] -void main() { - Out[0] = lerp(X[0], Y[0], S[0]); - Out[1] = float4(lerp(X[1].xyz, Y[1].xyz, S[1].xyz), lerp(X[1].w, Y[1].w, S[1].w)); - Out[2] = float4(lerp(X[2].xy, Y[2].xy, S[2].xy), lerp(X[2].zw, Y[2].zw, S[2].zw)); - Out[3] = lerp(float4(1, 2, -3, 4), float4(8, -7, 6, 5), float4(0.25, 0.5, 0.75, 0)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float32 - Stride: 16 - Data: [ 1, 2, -3, 4, 0, 20.3, 30.5, 5, -5, -16, 1, 2 ] - - Name: Y - Format: Float32 - Stride: 16 - Data: [ 8, -7, 6, 5, 0, 60.3, 30.5, 15, -15, -8, 100, 200 ] - - Name: S - Format: Float32 - Stride: 16 - Data: [ 0.25, 0.5, 0.75, 0, 0.5, 0.4, 0.8, 0.8, 0.4, 0.6, 0.1, 0.9 ] - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut - Format: Float32 - Stride: 16 - Data: [ 2.75, -2.5, 3.75, 4, 0, 36.3, 30.5, 13, -9, -11.2, 10.9, 180.2, 2.75, -2.5, 3.75, 4 ] -Results: - - Result: Test0 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: S - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/log.16.test b/test/Feature/HLSLLib/log.16.test deleted file mode 100644 index e6b1c1b6..00000000 --- a/test/Feature/HLSLLib/log.16.test +++ /dev/null @@ -1,72 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = log(In[0]); - half4 Tmp = {log(In[1].xyz), log(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {log(In[2].xy), log(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0xbc00, 0x4170, 0x4764, 0x5640, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, -1, 2.718281828, 7.389056, 100, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0x7c00, 0x7e00, 0x3c00, 0x4000, 0x449b, 0x7e00,] - # NaN, NaN, -Inf, -Inf, -Inf, -Inf, Inf, NaN, 1.0, 1.99999998, 4.6051701, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# 16bit-hlk-issue-7564 -# no hlk tests for 16 bit log so unsure of correct answers for some -# https://github.com/microsoft/DirectXShaderCompiler/issues/7564 -# XFAIL: DXC -# XFAIL: Clang -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/log.32.test b/test/Feature/HLSLLib/log.32.test deleted file mode 100644 index edae3aa2..00000000 --- a/test/Feature/HLSLLib/log.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = log(In[0]); - float4 Tmp = {log(In[1].xyz), log(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {log(In[2].xy), log(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, -1, 2.718281828, 7.389056, 100, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, -1, 2.718281828, 7.389056, 100, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, nan, -inf, -inf, -inf, -inf, inf, nan, 1.0, 1.99999998, 4.6051701, nan,] - # NaN, NaN, -Inf, -Inf, -Inf, -Inf, Inf, NaN, 1.0, 1.99999998, 4.6051701, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/log10.16.test b/test/Feature/HLSLLib/log10.16.test deleted file mode 100644 index b332a102..00000000 --- a/test/Feature/HLSLLib/log10.16.test +++ /dev/null @@ -1,69 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = log10(In[0]); - half4 Tmp = {log10(In[1].xyz), log10(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {log10(In[2].xy), log10(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x4900, 0x2e66, 0x5640,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 10, 0.1, 100, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0x7e00, 0xfc00, 0xfc00, 0xc73a, 0x7c00, 0x0000, 0x7e00, 0x3c00, 0xbc00, 0x4000,] - # NaN, NaN, NaN, -inf, -Inf, -7.2265, inf, 0, nan, 1, -1, 2, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/145073 -# XFAIL: Clang-Vulkan -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/log10.32.test b/test/Feature/HLSLLib/log10.32.test deleted file mode 100644 index 78f30152..00000000 --- a/test/Feature/HLSLLib/log10.32.test +++ /dev/null @@ -1,66 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = log10(In[0]); - float4 Tmp = {log10(In[1].xyz), log10(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {log10(In[2].xy), log10(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 1, -1, 10, 0.1, 100,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 10, 0.1, 100, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, nan, -inf, -inf, -inf, -inf, inf, 0, nan, 1, -1, 2,] -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/log2.16.test b/test/Feature/HLSLLib/log2.16.test deleted file mode 100644 index 00718ba1..00000000 --- a/test/Feature/HLSLLib/log2.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = log2(In[0]); - half4 Tmp = {log2(In[1].xyz), log2(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {log2(In[2].xy), log2(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x4400, 0x3400, 0x6800,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 4, 1/4, 2048, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0x7e00, 0xfc00, 0xfc00, 0xcdff, 0x7c00, 0x0000, 0x7e00, 0x4000, 0xc000, 0x4980,] - # NaN, NaN, NaN, -Inf, -Inf, -23.984375, inf, 0, NaN, 2, -2, 11, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/log2.32.test b/test/Feature/HLSLLib/log2.32.test deleted file mode 100644 index fe1095e7..00000000 --- a/test/Feature/HLSLLib/log2.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = log2(In[0]); - float4 Tmp = {log2(In[1].xyz), log2(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {log2(In[2].xy), log2(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 1, -1, 4, 0.25, 2048,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 4, 1/4, 2048, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, nan, -inf, -inf, -inf, -inf, inf, 0, nan, 2, -2, 11,] - # NaN, NaN, NaN, Inf, Inf, Inf, NaN, 0, NaN, 2, -2, 11, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/mad.32.test b/test/Feature/HLSLLib/mad.32.test deleted file mode 100644 index 23b5299f..00000000 --- a/test/Feature/HLSLLib/mad.32.test +++ /dev/null @@ -1,218 +0,0 @@ -#--- source.hlsl -StructuredBuffer M0 : register(t0); -StructuredBuffer A0 : register(t1); -StructuredBuffer B0 : register(t2); -StructuredBuffer M1 : register(t3); -StructuredBuffer A1 : register(t4); -StructuredBuffer B1 : register(t5); -StructuredBuffer M2 : register(t6); -StructuredBuffer A2 : register(t7); -StructuredBuffer B2 : register(t8); - -RWStructuredBuffer Out0 : register(u9); -RWStructuredBuffer Out1 : register(u10); -RWStructuredBuffer Out2 : register(u11); - - -[numthreads(1,1,1)] -void main() { - // float - Out0[0] = mad(M0[0], A0[0], B0[0]); - Out0[1] = float4(mad(M0[1].xyz, A0[1].xyz, B0[1].xyz), mad(M0[1].w, A0[1].w, B0[1].w)); - Out0[2] = float4(mad(M0[2].xy, A0[2].xy, B0[2].xy), mad(M0[2].zw, A0[2].zw, B0[2].zw)); - Out0[3] = mad(float4(1.0, 1.5, 1e+38, -1e+38), float4(1.0, 10, 4, 4), float4(1.0, -5.5, 0, 0)); - - // int - Out1[0] = mad(M1[0], A1[0], B1[0]); - Out1[1] = int4(mad(M1[1].xyz, A1[1].xyz, B1[1].xyz), mad(M1[1].w, A1[1].w, B1[1].w)); - Out1[2] = int4(mad(M1[2].xy, A1[2].xy, B1[2].xy), mad(M1[2].zw, A1[2].zw, B1[2].zw)); - Out1[3] = mad(int4(-2147483647, -256, 2147483647, -2147483648), int4(1, -256, 1, 1), int4(0, 0, 1, -1)); - - // uint - Out2[0] = mad(M2[0], A2[0], B2[0]); - Out2[1] = uint4(mad(M2[1].xyz, A2[1].xyz, B2[1].xyz), mad(M2[1].w, A2[1].w, B2[1].w)); - Out2[2] = uint4(mad(M2[2].xy, A2[2].xy, B2[2].xy), mad(M2[2].zw, A2[2].zw, B2[2].zw)); - Out2[3] = mad(uint4(2, 16, 65536, 4294967295), uint4(2, 16, 65536, 1), uint4(1, 15, 1, 1)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: M0 - Format: Float32 - Stride: 16 - Data: [ NaN, -Inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, Inf, 1.0, -1.0, 0, 1, 1.5 ] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1.0, -1.0, 0, 1, 1.5 - - Name: A0 - Format: Float32 - Stride: 16 - Data: [ NaN, -Inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, Inf, 1.0, -1.0, 0, 1, 10 ] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1.0, -1.0, 0, 1, 10 - - Name: B0 - Format: Float32 - Stride: 16 - Data: [ NaN, -Inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, Inf, 1.0, -1.0, 1, 0, -5.5 ] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1.0, -1.0, 1, 0, -5.5 - - Name: M1 - Format: Int32 - Stride: 16 - Data: [ -2147483647, -256, -1, 0, 1, 2, 16, 2147483647, 1, -1, 1, 10 ] - - Name: A1 - Format: Int32 - Stride: 16 - Data: [ 1, -256, -1, 0, 1, 3, 16, 0, 1, -1, 10, 100 ] - - Name: B1 - Format: Int32 - Stride: 16 - Data: [ 0, 0, 0, 0, 1, 3, 1, 255, 2147483646, -2147483647, -10, -2000 ] - - Name: M2 - Format: UInt32 - Stride: 16 - Data: [ 0, 1, 2, 16, 2147483647, 0, 10, 0, 100, 1000, 65536, 4294967295 ] - - Name: A2 - Format: UInt32 - Stride: 16 - Data: [ 0, 1, 2, 16, 1, 0, 10, 1, 2, 5, 65536, 1 ] - - Name: B2 - Format: UInt32 - Stride: 16 - Data: [ 0, 0, 1, 15, 0, 10, 10, 1, 50, 100, 1, 1 ] - - Name: Out0 - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut0 - Format: Float32 - Stride: 16 - Data: [ NaN, NaN, 0, 0, 0, 0, Inf, 2, 0, 1, 1, 9.5, 2, 9.5, Inf, -Inf ] - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut1 - Format: Int32 - Stride: 16 - Data: [ -2147483647, 65536, 1, 0, 2, 9, 257, 255, 2147483647, -2147483646, 0, -1000, -2147483647, 65536, -2147483648, 2147483647 ] - - Name: Out2 - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut2 - Format: UInt32 - Stride: 16 - Data: [ 0, 1, 5, 271, 2147483647, 10, 110, 1, 250, 5100, 1, 0, 5, 271, 1, 0 ] -Results: - - Result: Test0 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: M0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: A0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: B0 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: M1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: A1 - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: B1 - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: M2 - Kind: StructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: A2 - Kind: StructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: B2 - Kind: StructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 9 - Space: 0 - VulkanBinding: - Binding: 9 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 10 - Space: 0 - VulkanBinding: - Binding: 10 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 11 - Space: 0 - VulkanBinding: - Binding: 11 -#--- end - -# https://github.com/llvm/llvm-project/issues/140095 -# UNSUPPORTED: Clang-Vulkan - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7706 -# XFAIL: DXC-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/mad.fp16.test b/test/Feature/HLSLLib/mad.fp16.test deleted file mode 100644 index ea31b98e..00000000 --- a/test/Feature/HLSLLib/mad.fp16.test +++ /dev/null @@ -1,90 +0,0 @@ -#--- source.hlsl -StructuredBuffer M : register(t0); -StructuredBuffer A : register(t1); -StructuredBuffer B : register(t2); - -RWStructuredBuffer Out : register(u3); - - -[numthreads(1,1,1)] -void main() { - Out[0] = mad(M[0], A[0], B[0]); - Out[1] = half4(mad(M[1].xyz, A[1].xyz, B[1].xyz), mad(M[1].w, A[1].w, B[1].w)); - Out[2] = half4(mad(M[2].xy, A[2].xy, B[2].xy), mad(M[2].zw, A[2].zw, B[2].zw)); - Out[3] = mad(half4(1, 1.5, 300, -300), half4(1, 10, 300, 300), half4(1, -5.5, 1, -1)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: M - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x03FF, 0x8000, 0x0000, 0x7c00, 0x3c00, 0xbc00, 0x0000, 0x3c00, 0x3e00, 0xc300 ] - # NaN, -Inf, denorm, -0, 0, Inf, 1, -1, 0, 1, 1.5, -3.5 - - Name: A - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x3c00, 0x8000, 0x0000, 0x7c00, 0x3c00, 0xbc00, 0x0000, 0x3c00, 0x4900, 0x4500 ] - # NaN, -Inf, 1, -0, 0, Inf, 1, -1, 0, 1, 10, 5 - - Name: B - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x03FF, 0x8000, 0x0000, 0x7c00, 0x3c00, 0xbc00, 0x3c00, 0x0000, 0xc580, 0x3c00 ] - # NaN, -Inf, denorm, -0, 0, Inf, 1, -1, 1, 0, -5.5, 1 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0x07FE, 0x0000, 0x0000, 0x7c00, 0x4000, 0, 0x3c00, 0x3c00, 0x48c0, 0xcc20, 0x4000, 0x48c0, 0x7c00, 0xfc00 ] - # NaN, NaN, 0.00012195110, 0, 0, Inf, 2, 0, 1, 1, 9.5, -16.5, 2, 9.5, Inf, -Inf -Results: - - Result: Test0 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: M - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: A - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: B - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl - -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/mad.fp64.test b/test/Feature/HLSLLib/mad.fp64.test deleted file mode 100644 index 6bffe8c1..00000000 --- a/test/Feature/HLSLLib/mad.fp64.test +++ /dev/null @@ -1,92 +0,0 @@ -#--- source.hlsl -StructuredBuffer M : register(t0); -StructuredBuffer A : register(t1); -StructuredBuffer B : register(t2); - -RWStructuredBuffer Out : register(u3); - - -[numthreads(1,1,1)] -void main() { - Out[0] = mad(M[0], A[0], B[0]); - Out[1] = double4(mad(M[1].xyz, A[1].xyz, B[1].xyz), mad(M[1].w, A[1].w, B[1].w)); - Out[2] = double4(mad(M[2].xy, A[2].xy, B[2].xy), mad(M[2].zw, A[2].zw, B[2].zw)); - Out[3] = mad(double4(1.0, 1.5, 1e+308l, -1e+308l), double4(1.0, 10, 2, 2), double4(1.0, -5.5, 0, 0)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: M - Format: Float64 - Stride: 32 - Data: [ NaN, -Inf, 0x0.fffffffffffffp-1022, -0, 0, Inf, 1.0, -1.0, 0, 1, 1.5, -3.5 ] - # NaN, -Inf, denorm, -0, 0, Inf, 1.0, -1.0, 0, 1, 1.5, -3.5 - - Name: A - Format: Float64 - Stride: 32 - Data: [ NaN, -Inf, 1, -0, 0, Inf, 1.0, -1.0, 0, 1, 10, 5 ] - # NaN, -Inf, 1, -0, 0, Inf, 1.0, -1.0, 0, 1, 10, 5 - - Name: B - Format: Float64 - Stride: 32 - Data: [ NaN, -Inf, 0, -0, 0, Inf, 1.0, -1.0, 1, 0, -5.5, 1 ] - # NaN, -Inf, 0, -0, 0, Inf, 1.0, -1.0, 1, 0, -5.5, 1 - - Name: Out - Format: Float64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut - Format: Float64 - Stride: 32 - Data: [ NaN, NaN, 0x0.fffffffffffffp-1022, 0, 0, Inf, 2, 0, 1, 1, 9.5, -16.5, 2, 9.5, Inf, -Inf ] -Results: - - Result: Test0 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: M - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: A - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: B - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -#--- end - -# https://github.com/llvm/offload-test-suite/issues/358 -# XFAIL: DirectX-WARP - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -Gis -T cs_6_5 -Fo %t.o %t/source.hlsl - -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/mad.int16.test b/test/Feature/HLSLLib/mad.int16.test deleted file mode 100644 index adc2206d..00000000 --- a/test/Feature/HLSLLib/mad.int16.test +++ /dev/null @@ -1,151 +0,0 @@ -#--- source.hlsl -StructuredBuffer M0 : register(t0); -StructuredBuffer A0 : register(t1); -StructuredBuffer B0 : register(t2); -StructuredBuffer M1 : register(t3); -StructuredBuffer A1 : register(t4); -StructuredBuffer B1 : register(t5); - -RWStructuredBuffer Out0 : register(u6); -RWStructuredBuffer Out1 : register(u7); - - -[numthreads(1,1,1)] -void main() { - // int16_t - Out0[0] = mad(M0[0], A0[0], B0[0]); - Out0[1] = int16_t4(mad(M0[1].xyz, A0[1].xyz, B0[1].xyz), mad(M0[1].w, A0[1].w, B0[1].w)); - Out0[2] = int16_t4(mad(M0[2].xy, A0[2].xy, B0[2].xy), mad(M0[2].zw, A0[2].zw, B0[2].zw)); - Out0[3] = mad(int16_t4(-32768, -256, 32767, -32768), int16_t4(1, 8, 1, 1), int16_t4(0, 0, 1, -1)); - - // uint16_t - Out1[0] = mad(M1[0], A1[0], B1[0]); - Out1[1] = uint16_t4(mad(M1[1].xyz, A1[1].xyz, B1[1].xyz), mad(M1[1].w, A1[1].w, B1[1].w)); - Out1[2] = uint16_t4(mad(M1[2].xy, A1[2].xy, B1[2].xy), mad(M1[2].zw, A1[2].zw, B1[2].zw)); - Out1[3] = mad(uint16_t4(2, 16, 256, 65535), uint16_t4(2, 16, 256, 1), uint16_t4(1, 15, 1, 1)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: M0 - Format: Int16 - Stride: 8 - Data: [ -32768, -256, -1, 0, 1, 2, 16, 32767, 257, -1, 32767, -32768 ] - - Name: A0 - Format: Int16 - Stride: 8 - Data: [ 1, 8, -1, 0, 1, 3, 16, 1, 12, -1, 1, 1 ] - - Name: B0 - Format: Int16 - Stride: 8 - Data: [ 0, 0, 1, 3, 250, -30, -32768, -50, 10, 0, 1, -1 ] - - Name: M1 - Format: UInt16 - Stride: 8 - Data: [ 0, 1, 2, 16, 32767, 0, 10, 0, 100, 1000, 256, 65535 ] - - Name: A1 - Format: UInt16 - Stride: 8 - Data: [ 0, 1, 2, 16, 1, 0, 10, 1, 2, 5, 256, 1 ] - - Name: B1 - Format: UInt16 - Stride: 8 - Data: [ 0, 0, 1, 15, 0, 10, 10, 1, 50, 100, 1, 1 ] - - Name: Out0 - Format: Int16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: Int16 - Stride: 8 - Data: [ -32768, -2048, 2, 3, 251, -24, -32512, 32717, 3094, 1, -32768, 32767, -32768, -2048, -32768, 32767 ] - - Name: Out1 - Format: UInt16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut1 - Format: UInt16 - Stride: 8 - Data: [ 0, 1, 5, 271, 32767, 10, 110, 1, 250, 5100, 1, 0, 5, 271, 1, 0 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: M0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: A0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: B0 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: M1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: A1 - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: B1 - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 -#--- end - -# https://github.com/llvm/llvm-project/issues/140095 -# UNSUPPORTED: Clang-Vulkan - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl - -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/mad.int64.test b/test/Feature/HLSLLib/mad.int64.test deleted file mode 100644 index 31c53422..00000000 --- a/test/Feature/HLSLLib/mad.int64.test +++ /dev/null @@ -1,151 +0,0 @@ -#--- source.hlsl -StructuredBuffer M0 : register(t0); -StructuredBuffer A0 : register(t1); -StructuredBuffer B0 : register(t2); -StructuredBuffer M1 : register(t3); -StructuredBuffer A1 : register(t4); -StructuredBuffer B1 : register(t5); - -RWStructuredBuffer Out0 : register(u6); -RWStructuredBuffer Out1 : register(u7); - - -[numthreads(1,1,1)] -void main() { - // int64_t - Out0[0] = mad(M0[0], A0[0], B0[0]); - Out0[1] = int64_t4(mad(M0[1].xyz, A0[1].xyz, B0[1].xyz), mad(M0[1].w, A0[1].w, B0[1].w)); - Out0[2] = int64_t4(mad(M0[2].xy, A0[2].xy, B0[2].xy), mad(M0[2].zw, A0[2].zw, B0[2].zw)); - Out0[3] = mad(int64_t4(-9223372036854775807, -256, 9223372036854775807, -4611686018427387904), int64_t4(1, -512, 1, 2), int64_t4(0, 0, 1, -1)); - - // uint64_t - Out1[0] = mad(M1[0], A1[0], B1[0]); - Out1[1] = uint64_t4(mad(M1[1].xyz, A1[1].xyz, B1[1].xyz), mad(M1[1].w, A1[1].w, B1[1].w)); - Out1[2] = uint64_t4(mad(M1[2].xy, A1[2].xy, B1[2].xy), mad(M1[2].zw, A1[2].zw, B1[2].zw)); - Out1[3] = mad(uint64_t4(2, 16, 4294967296, 6148914691236517205), uint64_t4(2, 16, 4294967296, 3), uint64_t4(1, 15, 1, 1)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: M0 - Format: Int64 - Stride: 8 - Data: [ -9223372036854775807, -256, -1, 0, 1, 2, 16, 9223372036854775807, 1, -1, 1, 10 ] - - Name: A0 - Format: Int64 - Stride: 8 - Data: [ 1, -512, -1, 0, 1, 3, 16, 0, 1, -1, 10, 100 ] - - Name: B0 - Format: Int64 - Stride: 8 - Data: [ 0, 0, 0, 0, 1, 3, 1, 255, 9223372036854775806, -9223372036854775808, -10, -2000 ] - - Name: M1 - Format: UInt64 - Stride: 8 - Data: [ 0, 1, 2, 16, 9223372036854775807, 0, 10, 0, 100, 1000, 4294967296, 18446744073709551615 ] - - Name: A1 - Format: UInt64 - Stride: 8 - Data: [ 0, 1, 2, 16, 1, 0, 10, 1, 2, 5, 4294967296, 1 ] - - Name: B1 - Format: UInt64 - Stride: 8 - Data: [ 0, 0, 1, 15, 0, 10, 10, 1, 50, 100, 1, 1 ] - - Name: Out0 - Format: Int64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut0 - Format: Int64 - Stride: 32 - Data: [ -9223372036854775807, 131072, 1, 0, 2, 9, 257, 255, 9223372036854775807, -9223372036854775807, 0, -1000, -9223372036854775807, 131072, -9223372036854775808, 9223372036854775807 ] - - Name: Out1 - Format: UInt64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut1 - Format: UInt64 - Stride: 32 - Data: [ 0, 1, 5, 271, 9223372036854775807, 10, 110, 1, 250, 5100, 1, 0, 5, 271, 1, 0 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: M0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: A0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: B0 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: M1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: A1 - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: B1 - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 -#--- end - -# https://github.com/llvm/llvm-project/issues/140095 -# UNSUPPORTED: Clang-Vulkan - -# REQUIRES: Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl - -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/max.32.test b/test/Feature/HLSLLib/max.32.test deleted file mode 100644 index 56d38139..00000000 --- a/test/Feature/HLSLLib/max.32.test +++ /dev/null @@ -1,174 +0,0 @@ -#--- source.hlsl -StructuredBuffer X0 : register(t0); -StructuredBuffer Y0 : register(t1); -StructuredBuffer X1 : register(t2); -StructuredBuffer Y1 : register(t3); -StructuredBuffer X2 : register(t4); -StructuredBuffer Y2 : register(t5); - -RWStructuredBuffer Out0 : register(u6); -RWStructuredBuffer Out1 : register(u7); -RWStructuredBuffer Out2 : register(u8); - - -[numthreads(1,1,1)] -void main() { - // float - Out0[0] = max(X0[0], Y0[0]); - Out0[1] = float4(max(X0[1].xyz, Y0[1].xyz), max(X0[1].w, Y0[1].w)); - Out0[2] = float4(max(X0[2].xy, Y0[2].xy), max(X0[2].zw, Y0[2].zw)); - Out0[3] = max(half4(1.0, -1.0, 31408, -415), half4(-1.0, 1.0, 1.5, 129.5)); - - // int - Out1[0] = max(X1[0], Y1[0]); - Out1[1] = int4(max(X1[1].xyz, Y1[1].xyz), max(X1[1].w, Y1[1].w)); - Out1[2] = int4(max(X1[2].xy, Y1[2].xy), max(X1[2].zw, Y1[2].zw)); - Out1[3] = max(int4(-2147483648, -10, 10, 2147483647), int4(0, 10, 10, 0)); - - // uint - Out2[0] = max(X2[0], Y2[0]); - Out2[1] = uint4(max(X2[1].xyz, Y2[1].xyz), max(X2[1].w, Y2[1].w)); - Out2[2] = uint4(max(X2[2].xy, Y2[2].xy), max(X2[2].zw, Y2[2].zw)); - Out2[3] = max(uint4(0, 0, 10, 10000), uint4(0, 256, 4, 10001)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X0 - Format: Float32 - Stride: 16 - Data: [ 1.0, -1.0, 31408, -415, 3.14159, 42, -123.456, 0.0001, -10, 10.5, 3e+38, 0 ] - - Name: Y0 - Format: Float32 - Stride: 16 - Data: [ -1.0, 1.0, 1.5, 129.5, 2.71828, 42, -654.321, 0.0002, 10, 10.5, 0, -3e+38 ] - - Name: X1 - Format: Int32 - Stride: 16 - Data: [ -2147483648, -10, 0, 0, 10, 2147483647, 1000, 2500, 1, -1, 512, -2048 ] - - Name: Y1 - Format: Int32 - Stride: 16 - Data: [ 0, 10, -10, 10, 10, 0, 1500, 2000, -1, 1, 511, -2047 ] - - Name: X2 - Format: UInt32 - Stride: 16 - Data: [ 0, 0, 10, 10000, 2147483647, 4294967295, 1000, 2500, 1, 256, 512, 2048 ] - - Name: Y2 - Format: UInt32 - Stride: 16 - Data: [ 0, 256, 4, 10001, 0, 4294967295, 1500, 2000, 0, 200, 511, 2047 ] - - Name: Out0 - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut0 - Format: Float32 - Stride: 16 - Data: [ 1.0, 1.0, 31408, 129.5, 3.14159, 42, -123.456, 0.0002, 10, 10.5, 3e+38, 0, 1.0, 1.0, 31408, 129.5 ] - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut1 - Format: Int32 - Stride: 16 - Data: [ 0, 10, 0, 10, 10, 2147483647, 1500, 2500, 1, 1, 512, -2047, 0, 10, 10, 2147483647 ] - - Name: Out2 - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut2 - Format: UInt32 - Stride: 16 - Data: [ 0, 256, 10, 10001, 2147483647, 4294967295, 1500, 2500, 1, 256, 512, 2048, 0, 256, 10, 10001 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: X0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: X1 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Y1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: X2 - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Y2 - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/max.fp16.test b/test/Feature/HLSLLib/max.fp16.test deleted file mode 100644 index de695c25..00000000 --- a/test/Feature/HLSLLib/max.fp16.test +++ /dev/null @@ -1,76 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = max(X[0], Y[0]); - Out[1] = half4(max(X[1].xyz, Y[1].xyz), max(X[1].w, Y[1].w)); - Out[2] = half4(max(X[2].xy, Y[2].xy), max(X[2].zw, Y[2].zw)); - Out[3] = max(half4(1.0, -1.0, 31408, -415), half4(-1.0, 1.0, 1.5, 129.5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0xbc00, 0x77ab, 0xde7c, 0x4248, 0x5140, 0xd7b7, 0x068e, 0xc900, 0x4940, 0x7bff, 0x0000 ] - # 1.0, -1.0, 31408, -415, 3.14159, 42, -123.456, 0.0001, -10, 10.5, 65504, 0 - - Name: Y - Format: Float16 - Stride: 8 - Data: [ 0xbc00, 0x3c00, 0x3e00, 0x580c, 0x4170, 0x5140, 0xe11d, 0x0a8e, 0x4900, 0x4940, 0x0000, 0xfbff ] - # -1.0, 1.0, 1.5, 129.5, 2.71828, 42, -654.321, 0.0002, 10, 10.5, 0, -65504 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0x3c00, 0x77ab, 0x580c, 0x4248, 0x5140, 0xd7b7, 0x0a8e, 0x4900, 0x4940, 0x7bff, 0x0000, 0x3c00, 0x3c00, 0x77ab, 0x580c ] - # 1.0, 1.0, 31408, 129.5, 3.14159, 42, -123.456, 0.0002, 10, 10.5, 65504, 0, 1.0, 1.0, 31408, 129.5 -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/max.fp64.test b/test/Feature/HLSLLib/max.fp64.test deleted file mode 100644 index 1d98525b..00000000 --- a/test/Feature/HLSLLib/max.fp64.test +++ /dev/null @@ -1,73 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = max(X[0], Y[0]); - Out[1] = double4(max(X[1].xyz, Y[1].xyz), max(X[1].w, Y[1].w)); - Out[2] = double4(max(X[2].xy, Y[2].xy), max(X[2].zw, Y[2].zw)); - Out[3] = max(double4(1.0, -1.0, 31408, -415), double4(-1.0, 1.0, 1.5, 129.5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float64 - Stride: 32 - Data: [ 1.0, -1.0, 31408, -415, 3.14159, 42, -123.456, 0.0001, -10, 10.5, 1e+308, 0 ] - - Name: Y - Format: Float64 - Stride: 32 - Data: [ -1.0, 1.0, 1.5, 129.5, 2.71828, 42, -654.321, 0.0002, 10, 10.5, 0, -1e+308 ] - - Name: Out - Format: Float64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut0 - Format: Float64 - Stride: 32 - Data: [ 1.0, 1.0, 31408, 129.5, 3.14159, 42, -123.456, 0.0002, 10, 10.5, 1e+308, 0, 1.0, 1.0, 31408, 129.5 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/max.int16.test b/test/Feature/HLSLLib/max.int16.test deleted file mode 100644 index c5205845..00000000 --- a/test/Feature/HLSLLib/max.int16.test +++ /dev/null @@ -1,126 +0,0 @@ -#--- source.hlsl -StructuredBuffer X0 : register(t0); -StructuredBuffer Y0 : register(t1); -StructuredBuffer X1 : register(t2); -StructuredBuffer Y1 : register(t3); - -RWStructuredBuffer Out0 : register(u4); -RWStructuredBuffer Out1 : register(u5); - - -[numthreads(1,1,1)] -void main() { - // int16_t - Out0[0] = max(X0[0], Y0[0]); - Out0[1] = int16_t4(max(X0[1].xyz, Y0[1].xyz), max(X0[1].w, Y0[1].w)); - Out0[2] = int16_t4(max(X0[2].xy, Y0[2].xy), max(X0[2].zw, Y0[2].zw)); - Out0[3] = max(int16_t4(-32768, -10, 10, 32767), int16_t4(0, 10, 15, 0)); - - // uint16_t - Out1[0] = max(X1[0], Y1[0]); - Out1[1] = uint16_t4(max(X1[1].xyz, Y1[1].xyz), max(X1[1].w, Y1[1].w)); - Out1[2] = uint16_t4(max(X1[2].xy, Y1[2].xy), max(X1[2].zw, Y1[2].zw)); - Out1[3] = max(uint16_t4(0, 0, 10, 10000), uint16_t4(0, 256, 4, 10001)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X0 - Format: Int16 - Stride: 8 - Data: [ -32768, -10, 0, 0, 10, 32767, 1000, 2500, 1, -1, 512, -2048 ] - - Name: Y0 - Format: Int16 - Stride: 8 - Data: [ 0, 10, -3114, 272, 15, 0, 1500, 2000, -1, 1, 511, -2047 ] - - Name: X1 - Format: UInt16 - Stride: 8 - Data: [ 0, 0, 10, 10000, 32767, 65535, 1000, 2500, 1, 256, 512, 2048 ] - - Name: Y1 - Format: UInt16 - Stride: 8 - Data: [ 0, 256, 4, 10001, 0, 65535, 1500, 2000, 0, 200, 511, 2047 ] - - Name: Out0 - Format: Int16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: Int16 - Stride: 8 - Data: [ 0, 10, 0, 272, 15, 32767, 1500, 2500, 1, 1, 512, -2047, 0, 10, 15, 32767 ] - - Name: Out1 - Format: UInt16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut1 - Format: UInt16 - Stride: 8 - Data: [ 0, 256, 10, 10001, 32767, 65535, 1500, 2500, 1, 256, 512, 2048, 0, 256, 10, 10001 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: X0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: X1 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Y1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7691 -# XFAIL: DXC-Vulkan - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/max.int64.test b/test/Feature/HLSLLib/max.int64.test deleted file mode 100644 index 7188c992..00000000 --- a/test/Feature/HLSLLib/max.int64.test +++ /dev/null @@ -1,123 +0,0 @@ -#--- source.hlsl -StructuredBuffer X0 : register(t0); -StructuredBuffer Y0 : register(t1); -StructuredBuffer X1 : register(t2); -StructuredBuffer Y1 : register(t3); - -RWStructuredBuffer Out0 : register(u4); -RWStructuredBuffer Out1 : register(u5); - - -[numthreads(1,1,1)] -void main() { - // int64_t - Out0[0] = max(X0[0], Y0[0]); - Out0[1] = int64_t4(max(X0[1].xyz, Y0[1].xyz), max(X0[1].w, Y0[1].w)); - Out0[2] = int64_t4(max(X0[2].xy, Y0[2].xy), max(X0[2].zw, Y0[2].zw)); - Out0[3] = max(int64_t4(-9223372036854775808, -10, 10, 9223372036854775807), int64_t4(0, 10, 15, 0)); - - // uint64_t - Out1[0] = max(X1[0], Y1[0]); - Out1[1] = uint64_t4(max(X1[1].xyz, Y1[1].xyz), max(X1[1].w, Y1[1].w)); - Out1[2] = uint64_t4(max(X1[2].xy, Y1[2].xy), max(X1[2].zw, Y1[2].zw)); - Out1[3] = max(uint64_t4(0, 0, 10, 10000), uint64_t4(0, 256, 4, 10001)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X0 - Format: Int64 - Stride: 32 - Data: [ -9223372036854775808, -10, 0, 0, 10, 9223372036854775807, 1000, 2500, 1, -1, 512, -2048 ] - - Name: Y0 - Format: Int64 - Stride: 32 - Data: [ 0, 10, -3114, 272, 15, 0, 1500, 2000, -1, 1, 511, -2047 ] - - Name: X1 - Format: UInt64 - Stride: 32 - Data: [ 0, 0, 10, 10000, 9223372036854775807, 18446744073709551615, 1000, 2500, 1, 256, 512, 2048 ] - - Name: Y1 - Format: UInt64 - Stride: 32 - Data: [ 0, 256, 4, 10001, 0, 18446744073709551615, 1500, 2000, 0, 200, 511, 2047 ] - - Name: Out0 - Format: Int64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut0 - Format: Int64 - Stride: 32 - Data: [ 0, 10, 0, 272, 15, 9223372036854775807, 1500, 2500, 1, 1, 512, -2047, 0, 10, 15, 9223372036854775807 ] - - Name: Out1 - Format: UInt64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut1 - Format: UInt64 - Stride: 32 - Data: [ 0, 256, 10, 10001, 9223372036854775807, 18446744073709551615, 1500, 2500, 1, 256, 512, 2048, 0, 256, 10, 10001 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: X0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: X1 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Y1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# REQUIRES: Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/min.32.test b/test/Feature/HLSLLib/min.32.test deleted file mode 100644 index 45c45ad8..00000000 --- a/test/Feature/HLSLLib/min.32.test +++ /dev/null @@ -1,176 +0,0 @@ -#--- source.hlsl -StructuredBuffer X0 : register(t0); -StructuredBuffer Y0 : register(t1); -StructuredBuffer X1 : register(t2); -StructuredBuffer Y1 : register(t3); -StructuredBuffer X2 : register(t4); -StructuredBuffer Y2 : register(t5); - -RWStructuredBuffer Out0 : register(u6); -RWStructuredBuffer Out1 : register(u7); -RWStructuredBuffer Out2 : register(u8); - - -[numthreads(1,1,1)] -void main() { - // float - Out0[0] = min(X0[0], Y0[0]); - Out0[1] = float4(min(X0[1].xyz, Y0[1].xyz), min(X0[1].w, Y0[1].w)); - Out0[2] = float4(min(X0[2].xy, Y0[2].xy), min(X0[2].zw, Y0[2].zw)); - Out0[3] = min(half4(1.0, -1.0, 31408, -415), half4(-1.0, 1.0, 1.5, 129.5)); - - // int - Out1[0] = min(X1[0], Y1[0]); - Out1[1] = int4(min(X1[1].xyz, Y1[1].xyz), min(X1[1].w, Y1[1].w)); - Out1[2] = int4(min(X1[2].xy, Y1[2].xy), min(X1[2].zw, Y1[2].zw)); - Out1[3] = min(X1[3], Y1[3]); - Out1[3] = min(int4(-2147483648, -10, 10, 2147483647), int4(0, 10, 10, 0)); - - // uint - Out2[0] = min(X2[0], Y2[0]); - Out2[1] = uint4(min(X2[1].xyz, Y2[1].xyz), min(X2[1].w, Y2[1].w)); - Out2[2] = uint4(min(X2[2].xy, Y2[2].xy), min(X2[2].zw, Y2[2].zw)); - Out2[3] = min(X2[3], Y2[3]); - Out2[3] = min(uint4(0, 0, 10, 10000), uint4(0, 256, 4, 10001)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X0 - Format: Float32 - Stride: 16 - Data: [ 1.0, -1.0, 31408, -415, 3.14159, 42, -123.456, 0.0001, -10, 10.5, 3e+38, 0 ] - - Name: Y0 - Format: Float32 - Stride: 16 - Data: [ -1.0, 1.0, 1.5, 129.5, 2.71828, 42, -654.321, 0.0002, 10, 10.5, 0, -3e+38 ] - - Name: X1 - Format: Int32 - Stride: 16 - Data: [ -2147483648, -10, 0, 0, 10, 2147483647, 1000, 2500, 1, -1, 512, -2048 ] - - Name: Y1 - Format: Int32 - Stride: 16 - Data: [ 0, 10, -10, 10, 10, 0, 1500, 2000, -1, 1, 511, -2047 ] - - Name: X2 - Format: UInt32 - Stride: 16 - Data: [ 0, 0, 10, 10000, 2147483647, 4294967295, 1000, 2500, 1, 256, 512, 2048 ] - - Name: Y2 - Format: UInt32 - Stride: 16 - Data: [ 0, 256, 4, 10001, 0, 4294967295, 1500, 2000, 0, 200, 511, 2047 ] - - Name: Out0 - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut0 - Format: Float32 - Stride: 16 - Data: [ -1.0, -1.0, 1.5, -415, 2.71828, 42, -654.321, 0.0001, -10, 10.5, 0, -3e+38, -1.0, -1.0, 1.5, -415 ] - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut1 - Format: Int32 - Stride: 16 - Data: [ -2147483648, -10, -10, 0, 10, 0, 1000, 2000, -1, -1, 511, -2048, -2147483648, -10, 10, 0 ] - - Name: Out2 - Format: UInt32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut2 - Format: UInt32 - Stride: 16 - Data: [ 0, 0, 4, 10000, 0, 4294967295, 1000, 2000, 0, 200, 511, 2047, 0, 0, 4, 10000 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: X0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: X1 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Y1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: X2 - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Y2 - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/min.fp16.test b/test/Feature/HLSLLib/min.fp16.test deleted file mode 100644 index e792b316..00000000 --- a/test/Feature/HLSLLib/min.fp16.test +++ /dev/null @@ -1,76 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = min(X[0], Y[0]); - Out[1] = half4(min(X[1].xyz, Y[1].xyz), min(X[1].w, Y[1].w)); - Out[2] = half4(min(X[2].xy, Y[2].xy), min(X[2].zw, Y[2].zw)); - Out[3] = min(half4(1.0, -1.0, 31408, -415), half4(-1.0, 1.0, 1.5, 129.5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0xbc00, 0x77ab, 0xde7c, 0x4248, 0x5140, 0xd7b7, 0x068e, 0xc900, 0x4940, 0x7bff, 0x0000 ] - # 1.0, -1.0, 31408, -415, 3.14159, 42, -123.456, 0.0001, -10, 10.5, 65504, 0 - - Name: Y - Format: Float16 - Stride: 8 - Data: [ 0xbc00, 0x3c00, 0x3e00, 0x580c, 0x4170, 0x5140, 0xe11d, 0x0a8e, 0x4900, 0x4940, 0x0000, 0xfbff ] - # -1.0, 1.0, 1.5, 129.5, 2.71828, 42, -654.321, 0.0002, 10, 10.5, 0, -65504 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: Float16 - Stride: 8 - Data: [ 0xbc00, 0xbc00, 0x3e00, 0xde7c, 0x4170, 0x5140, 0xe11d, 0x068e, 0xc900, 0x4940, 0x0000, 0xfbff, 0xbc00, 0xbc00, 0x3e00, 0xde7c ] - # -1.0, -1.0, 1.5, -415, 2.71828, 42, -654.321, 0.0001, -10, 10.5, 0, -65504, -1.0, -1.0, 1.5, -415 -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/min.fp64.test b/test/Feature/HLSLLib/min.fp64.test deleted file mode 100644 index 2d1a7b53..00000000 --- a/test/Feature/HLSLLib/min.fp64.test +++ /dev/null @@ -1,73 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = min(X[0], Y[0]); - Out[1] = double4(min(X[1].xyz, Y[1].xyz), min(X[1].w, Y[1].w)); - Out[2] = double4(min(X[2].xy, Y[2].xy), min(X[2].zw, Y[2].zw)); - Out[3] = min(double4(1.0, -1.0, 31408, -415), double4(-1.0, 1.0, 1.5, 129.5)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float64 - Stride: 32 - Data: [ 1.0, -1.0, 31408, -415, 3.14159, 42, -123.456, 0.0001, -10, 10.5, 1e+308, 0 ] - - Name: Y - Format: Float64 - Stride: 32 - Data: [ -1.0, 1.0, 1.5, 129.5, 2.71828, 42, -654.321, 0.0002, 10, 10.5, 0, -1e+308 ] - - Name: Out - Format: Float64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut0 - Format: Float64 - Stride: 32 - Data: [ -1.0, -1.0, 1.5, -415, 2.71828, 42, -654.321, 0.0001, -10, 10.5, 0, -1e+308, -1.0, -1.0, 1.5, -415 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut0 -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/min.int16.test b/test/Feature/HLSLLib/min.int16.test deleted file mode 100644 index 7ba9c0b6..00000000 --- a/test/Feature/HLSLLib/min.int16.test +++ /dev/null @@ -1,126 +0,0 @@ -#--- source.hlsl -StructuredBuffer X0 : register(t0); -StructuredBuffer Y0 : register(t1); -StructuredBuffer X1 : register(t2); -StructuredBuffer Y1 : register(t3); - -RWStructuredBuffer Out0 : register(u4); -RWStructuredBuffer Out1 : register(u5); - - -[numthreads(1,1,1)] -void main() { - // int16_t - Out0[0] = min(X0[0], Y0[0]); - Out0[1] = int16_t4(min(X0[1].xyz, Y0[1].xyz), min(X0[1].w, Y0[1].w)); - Out0[2] = int16_t4(min(X0[2].xy, Y0[2].xy), min(X0[2].zw, Y0[2].zw)); - Out0[3] = min(int16_t4(-32768, -10, 10, 32767), int16_t4(0, 10, 15, 0)); - - // uint16_t - Out1[0] = min(X1[0], Y1[0]); - Out1[1] = uint16_t4(min(X1[1].xyz, Y1[1].xyz), min(X1[1].w, Y1[1].w)); - Out1[2] = uint16_t4(min(X1[2].xy, Y1[2].xy), min(X1[2].zw, Y1[2].zw)); - Out1[3] = min(uint16_t4(0, 0, 10, 10000), uint16_t4(0, 256, 4, 10001)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X0 - Format: Int16 - Stride: 8 - Data: [-32768, -10, 0, 0, 10, 32767, 1000, 2500, 1, -1, 512, -2048 ] - - Name: Y0 - Format: Int16 - Stride: 8 - Data: [ 0, 10, -3114, 272, 15, 0, 1500, 2000, -1, 1, 511, -2047 ] - - Name: X1 - Format: UInt16 - Stride: 8 - Data: [ 0, 0, 10, 10000, 32767, 65535, 1000, 2500, 1, 256, 512, 2048 ] - - Name: Y1 - Format: UInt16 - Stride: 8 - Data: [ 0, 256, 4, 10001, 0, 65535, 1500, 2000, 0, 200, 511, 2047 ] - - Name: Out0 - Format: Int16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut0 - Format: Int16 - Stride: 8 - Data: [ -32768, -10, -3114, 0, 10, 0, 1000, 2000, -1, -1, 511, -2048, -32768, -10, 10, 0 ] - - Name: Out1 - Format: UInt16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut1 - Format: UInt16 - Stride: 8 - Data: [ 0, 0, 4, 10000, 0, 65535, 1000, 2000, 0, 200, 511, 2047, 0, 0, 4, 10000 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: X0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: X1 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Y1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7691 -# XFAIL: DXC-Vulkan - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/min.int64.test b/test/Feature/HLSLLib/min.int64.test deleted file mode 100644 index e8745948..00000000 --- a/test/Feature/HLSLLib/min.int64.test +++ /dev/null @@ -1,123 +0,0 @@ -#--- source.hlsl -StructuredBuffer X0 : register(t0); -StructuredBuffer Y0 : register(t1); -StructuredBuffer X1 : register(t2); -StructuredBuffer Y1 : register(t3); - -RWStructuredBuffer Out0 : register(u4); -RWStructuredBuffer Out1 : register(u5); - - -[numthreads(1,1,1)] -void main() { - // int64_t - Out0[0] = min(X0[0], Y0[0]); - Out0[1] = int64_t4(min(X0[1].xyz, Y0[1].xyz), min(X0[1].w, Y0[1].w)); - Out0[2] = int64_t4(min(X0[2].xy, Y0[2].xy), min(X0[2].zw, Y0[2].zw)); - Out0[3] = min(int64_t4(-9223372036854775808, -10, 10, 9223372036854775807), int64_t4(0, 10, 15, 0)); - - // uint64_t - Out1[0] = min(X1[0], Y1[0]); - Out1[1] = uint64_t4(min(X1[1].xyz, Y1[1].xyz), min(X1[1].w, Y1[1].w)); - Out1[2] = uint64_t4(min(X1[2].xy, Y1[2].xy), min(X1[2].zw, Y1[2].zw)); - Out1[3] = min(uint64_t4(0, 0, 10, 10000), uint64_t4(0, 256, 4, 10001)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X0 - Format: Int64 - Stride: 32 - Data: [ -9223372036854775808, -10, 0, 0, 10, 9223372036854775807, 1000, 2500, 1, -1, 512, -2048 ] - - Name: Y0 - Format: Int64 - Stride: 32 - Data: [ 0, 10, -3114, 272, 15, 0, 1500, 2000, -1, 1, 511, -2047 ] - - Name: X1 - Format: UInt64 - Stride: 32 - Data: [ 0, 0, 10, 10000, 9223372036854775807, 18446744073709551615, 1000, 2500, 1, 256, 512, 2048 ] - - Name: Y1 - Format: UInt64 - Stride: 32 - Data: [ 0, 256, 4, 10001, 0, 18446744073709551615, 1500, 2000, 0, 200, 511, 2047 ] - - Name: Out0 - Format: Int64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut0 - Format: Int64 - Stride: 32 - Data: [ -9223372036854775808, -10, -3114, 0, 10, 0, 1000, 2000, -1, -1, 511, -2048, -9223372036854775808, -10, 10, 0 ] - - Name: Out1 - Format: UInt64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut1 - Format: UInt64 - Stride: 32 - Data: [ 0, 0, 4, 10000, 0, 18446744073709551615, 1000, 2000, 0, 200, 511, 2047, 0, 0, 4, 10000 ] -Results: - - Result: Test0 - Rule: BufferExact - Actual: Out0 - Expected: ExpectedOut0 - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: X0 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y0 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: X1 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Y1 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out0 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -#--- end - -# REQUIRES: Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/normalize.16.test b/test/Feature/HLSLLib/normalize.16.test deleted file mode 100644 index 9ee42d9c..00000000 --- a/test/Feature/HLSLLib/normalize.16.test +++ /dev/null @@ -1,63 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = normalize(In[0]); - Out[1] = half4(normalize(In[1].xyz), normalize(In[1].w)); - Out[2] = half4(normalize(In[2].xy), normalize(In[2].zw)); - Out[3] = normalize(half4(1, 2, -3, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0x4000, 0xc200, 0x4400, 0x0000, 0x44b3, 0xc840, 0x491a, 0x4500, 0x4a00, 0x4200, 0xc400 ] - # 1, 2, -3, 4, 0, 4.7, -8.5, 10.2, 5, 12, 3, -4 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: Float16 - Stride: 8 - Data: [ 0x31d8, 0x35d8, 0xb862, 0x39d8, 0, 0x37be, 0xbb00, 0x3c00, 0x3627, 0x3b62, 0x38cd, 0xba66, 0x31d8, 0x35d8, 0xb862, 0x39d8 ] - # 0.18257, 0.36515, -0.54772, 0.73030, 0, 0.48389, -0.87513, 1, 0.38462, 0.92308, 0.6, -0.8, 0.18257, 0.36515, -0.54772, 0.73030 -Results: - - Result: Test0 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/normalize.32.test b/test/Feature/HLSLLib/normalize.32.test deleted file mode 100644 index 961c7bbd..00000000 --- a/test/Feature/HLSLLib/normalize.32.test +++ /dev/null @@ -1,60 +0,0 @@ -#--- source.hlsl -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - - -[numthreads(1,1,1)] -void main() { - Out[0] = normalize(In[0]); - Out[1] = float4(normalize(In[1].xyz), normalize(In[1].w)); - Out[2] = float4(normalize(In[2].xy), normalize(In[2].zw)); - Out[3] = normalize(float4(1, 2, -3, 4)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ 1, 2, -3, 4, 0, 4.7, -8.5, 10.2, 5, 12, 3, -4 ] - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut - Format: Float32 - Stride: 16 - Data: [ 0.18257, 0.36515, -0.54772, 0.73030, 0, 0.48389, -0.87513, 1, 0.38462, 0.92308, 0.6, -0.8, 0.18257, 0.36515, -0.54772, 0.73030 ] -Results: - - Result: Test0 - Rule: BufferFloatEpsilon - Epsilon: 0.008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/or.test b/test/Feature/HLSLLib/or.test deleted file mode 100644 index 7b65cd34..00000000 --- a/test/Feature/HLSLLib/or.test +++ /dev/null @@ -1,76 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = or(In1[0], In2[0]); - bool4 Tmp = {or(In1[0].xyz, In2[0].xyz), or(In1[0].w, In2[0].w)}; - Out[1] = Tmp; - Out[2].xy = or(In1[0].xy, In2[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Bool - Stride: 16 - Data: [1, 0, 1, 0] - - Name: In2 - Format: Bool - Stride: 16 - Data: [1, 0, 0, 1] - - Name: Out - Format: Bool - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Bool - Stride: 16 - Data: [1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0] # Last two are filler -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/140824 -# XFAIL: Clang -# https://github.com/microsoft/DirectXShaderCompiler/issues/7475 -# XFAIL: DXC-Vulkan -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/pow.16.test b/test/Feature/HLSLLib/pow.16.test deleted file mode 100644 index 0175fb94..00000000 --- a/test/Feature/HLSLLib/pow.16.test +++ /dev/null @@ -1,79 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = pow(X[0], Y[0]); - Out[1] = half4(pow(X[1].xyz, Y[1].xyz), pow(X[1].w, Y[1].w)); - Out[2] = half4(pow(X[2].xy, Y[2].xy), pow(X[2].zw, Y[2].zw)); - Out[3] = pow(half4(2, 4, 1.5, 8), half4(3, 0.5, 2, 0.33333333)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float16 - Stride: 8 - Data: [ 0x4000, 0x4400, 0x3e00, 0x4800, 0x4500, 0x4700, 0x4200, 0x0000, 0x0000, 0x4400, 0x4170, 0x4900 ] - # 2, 4, 1.5, 8, 5, 7, 3, 0, 0, 4, 2.718, 10 - - Name: Y - Format: Float16 - Stride: 8 - Data: [ 0x4200, 0x3800, 0x4000, 0x3555, 0x0000, 0x3c00, 0x4500, 0xbc00, 0x4600, 0xbc00, 0x4000, 0xc500 ] - # 3, 0.5, 2, 0.33333333, 0, 1, 5, -1, 6, -1, 2, -5 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: Float16 - Stride: 8 - Data: [ 0x4800, 0x4000, 0x4080, 0x4000, 0x3c00, 0x4700, 0x5b98, 0x7c00, 0x0000, 0x3400, 0x4763, 0x00a8, 0x4800, 0x4000, 0x4080, 0x4000 ] - # 8, 2, 2.25, 2, 1, 7, NaN, inf, 0, 0.25, 7.387524, 0.00001, 8, 2, 2.25, 2 -Results: - - Result: Test - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7702 -# XFAIL: DXC-Vulkan - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/pow.32.test b/test/Feature/HLSLLib/pow.32.test deleted file mode 100644 index b38bd6b6..00000000 --- a/test/Feature/HLSLLib/pow.32.test +++ /dev/null @@ -1,75 +0,0 @@ -#--- source.hlsl -StructuredBuffer X : register(t0); -StructuredBuffer Y : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = pow(X[0], Y[0]); - Out[1] = float4(pow(X[1].xyz, Y[1].xyz), pow(X[1].w, Y[1].w)); - Out[2] = float4(pow(X[2].xy, Y[2].xy), pow(X[2].zw, Y[2].zw)); - Out[3] = pow(float4(2, 4, 1.5, 8), float4(3, 0.5, 2, 0.33333333)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: X - Format: Float32 - Stride: 16 - Data: [ 2, 4, 1.5, 8, 5, 7, 3, 0, 0, 4, 2.718, 10 ] - - Name: Y - Format: Float32 - Stride: 16 - Data: [ 3, 0.5, 2, 0.33333333, 0, 1, 5, -1, 6, -1, 2, -5 ] - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut - Format: Float32 - Stride: 16 - Data: [ 8, 2, 2.25, 2, 1, 7, 243, inf, 0, 0.25, 7.387524, 0.00001, 8, 2, 2.25, 2 ] -Results: - - Result: Test - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7702 -# XFAIL: DXC-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/radians.16.test b/test/Feature/HLSLLib/radians.16.test deleted file mode 100644 index 481373c5..00000000 --- a/test/Feature/HLSLLib/radians.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = radians(In[0]); - half4 Tmp = {radians(In[1].xyz), radians(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {radians(In[2].xy), radians(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x59a0, 0xd9a0, 0x5729, 0x5329, 0xd329] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 180, -180, 114.592, 57.2958, -57.2958 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8000, 0x8000, 0x0000, 0x0000, 0x7c00, 0x4249, 0xc248, 0x4000, 0x3c00, 0xbc00] - # NaN, -inf, -0, -0, 0, 0, inf, 3.1415926, -3.1415926, 2.00001, 1, -1 -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/radians.32.test b/test/Feature/HLSLLib/radians.32.test deleted file mode 100644 index c0fb35ee..00000000 --- a/test/Feature/HLSLLib/radians.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = radians(In[0]); - float4 Tmp = {radians(In[1].xyz), radians(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {radians(In[2].xy), radians(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 180, -180, 114.592, 57.2958, -57.2958] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 180, -180, 114.592, 57.2958, -57.2958 - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0, -0, 0, 0, inf, 3.1415926, -3.1415926, 0x1.0000400000000p+1, 0x1.0000060000000p+0, -0x1.0000060000000p+0] - # NaN, NaN, 0, 0, 0, 0, NaN, 3.1415926, -3.1415926, 2.00001, 1, -1 -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/rcp.16.test b/test/Feature/HLSLLib/rcp.16.test deleted file mode 100644 index 6caa0287..00000000 --- a/test/Feature/HLSLLib/rcp.16.test +++ /dev/null @@ -1,71 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = rcp(In[0]); - half4 Tmp = {rcp(In[1].xyz), rcp(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {rcp(In[2].xy), rcp(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = rcp(half4(1, 10, 0.2, -4)); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x4200, 0x4900, 0x3c00, 0xbc00, 0x4100, 0xb400, 0x4000, 0x3800, 0x4100, 0xb400, 0x4000, 0x3800] - # 3, 10, 1, -1, 2.5, -0.25, 2, 0.5, 2.5, -0.25, 2, 0.5 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [0x3555, 0x2e66, 0x3c00, 0xbc00, 0x3666, 0xc400, 0x3800, 0x4000, 0x3666, 0xc400, 0x3800, 0x4000, 0x3c00, 0x2e66, 0x4500, 0xb400] - # 0.333, 0.1, 1, -1, 0.4, -4, 0.5, 2, 0.4, -4, 0.5, 2, 1, 0.1, 5, -0.25 -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan && !VK_KHR_shader_float_controls2 - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/rcp.32.test b/test/Feature/HLSLLib/rcp.32.test deleted file mode 100644 index 49fdd7e0..00000000 --- a/test/Feature/HLSLLib/rcp.32.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = rcp(In[0]); - float4 Tmp = {rcp(In[1].xyz), rcp(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {rcp(In[2].xy), rcp(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = rcp(float4(1, 10, 0.2, -4)); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [3, 10, 1, -1, 2.5, -0.25, 2, 0.5, 2.5, -0.25, 2, 0.432] - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [0.333333, 0.1, 1, -1, 0.4, -4 , 0.5, 2, 0.4, -4 , 0.5, 2.31481481481, 1, 0.1, 5, -0.25] -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan && !VK_KHR_shader_float_controls2 - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/rcp.64.test b/test/Feature/HLSLLib/rcp.64.test deleted file mode 100644 index ae9a6770..00000000 --- a/test/Feature/HLSLLib/rcp.64.test +++ /dev/null @@ -1,69 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = rcp(In[0]); - double4 Tmp = {rcp(In[1].xyz), rcp(In[1].w)}; - Out[1] = Tmp; - double4 Tmp2 = {rcp(In[2].xy), rcp(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = rcp(double4(1, 10, 0.2, -4)); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float64 - Stride: 32 - Data: [ 3, 10, 1, -1, 2.5, -0.25, 2, 0.5, 2.5, -0.25, 2, 0.432] - - Name: Out - Format: Float64 - Stride: 32 - ZeroInitSize: 128 - - Name: ExpectedOut # The result we expect - Format: Float64 - Stride: 32 - Data: [ 0.333333, 0.1, 1, -1, 0.4, -4, 0.5, 2, 0.4, -4, 0.5, 2.31481, 1, 0.1, 5, -0.25 ] -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/149561 -# XFAIL: Clang-Vulkan && !VK_KHR_shader_float_controls2 - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/reflect.16.test b/test/Feature/HLSLLib/reflect.16.test deleted file mode 100644 index 823bfa0f..00000000 --- a/test/Feature/HLSLLib/reflect.16.test +++ /dev/null @@ -1,194 +0,0 @@ -#--- source.hlsl - -// This test tests three different reflection scenarios -// One in 2D, 3D, and 4D - -StructuredBuffer IncidentRay2D : register(t0); -StructuredBuffer IncidentRay3D : register(t1); -StructuredBuffer IncidentRay4D : register(t2); -StructuredBuffer Wall2D : register(t3); -StructuredBuffer Wall3D : register(t4); -StructuredBuffer Wall4D : register(t5); -RWStructuredBuffer Result2D : register(u6); -RWStructuredBuffer Result3D : register(u7); -RWStructuredBuffer Result4D : register(u8); - -[numthreads(1,1,1)] -void main() { - // 2D case - half2 result2D = reflect(IncidentRay2D[0].xy, normalize(Wall2D[0].xy)); - Result2D[0] = result2D; - half2 result2D_constant = reflect(half2(0.75, -0.5), half2(0.70710677, 0.70710677)); - Result2D[1] = result2D_constant; - - // 3D case, using half4 for alignment - half4 result3D = half4(reflect(IncidentRay3D[0].xyz, normalize(Wall3D[0].xyz)), 0.0); - Result3D[0] = result3D; - half4 result3D_constant = half4(reflect(half3(0.5, -0.25, 0.75), half3(0.5, 0.5, 0.70710677)), 0.0); - Result3D[1] = result3D_constant; - - // 4D case - half4 result4D = reflect(IncidentRay4D[0].xyzw, normalize(Wall4D[0].xyzw)); - Result4D[0] = result4D; - half4 result4D_constant = reflect(half4(0.5, -0.25, 0.75, -0.5), half4(0.5, 0.5, 0.5, 0.5)); - Result4D[1] = result4D_constant; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - - Name: IncidentRay2D - Format: Float16 - Stride: 4 - Data: [ 0x3a00, 0xb800 ] - # 0.75, -0.5 - - Name: IncidentRay3D - Format: Float16 - Stride: 8 - Data: [ 0x3800, 0xb400, 0x3a00, 0x0000 ] - # 0.5, -0.25, 0.75, 0.0 - - Name: IncidentRay4D - Format: Float16 - Stride: 8 - Data: [ 0x3800, 0xb400, 0x3a00, 0xb800 ] - # 0.5, -0.25, 0.75, 0.0 - - Name: Wall2D - Format: Float16 - Stride: 4 - Data: [ 0x39a8, 0x39a8 ] - # 0.70703125, 0.70703125 - - Name: Wall3D - Format: Float16 - Stride: 8 - Data: [ 0x3800, 0x3800, 0x39a8, 0x0000 ] - # 0.5, 0.5, 0.70703125, 0.0 - - Name: Wall4D - Format: Float16 - Stride: 8 - Data: [ 0x3800, 0x3800, 0x3800, 0x3800 ] - # 0.5, 0.5, 0.5, 0.5 - - Name: Result2D - Format: Float16 - Stride: 4 - ZeroInitSize: 8 - - Name: ExpectedResult2D - Format: Float16 - Stride: 4 - Data: [ 0x3800, 0xba00, 0x3800, 0xba00 ] - # 0.5, -0.75, 0.5, -0.75 - - Name: Result3D - Format: Float16 - Stride: 8 - ZeroInitSize: 16 - - Name: ExpectedResult3D - Format: Float16 - Stride: 8 - Data: [ 0xb0f8, 0xbb3e, 0xb1a8, 0x0000, 0xb0f8, 0xbb3e, 0xb1a8, 0x0000 ] - # -0.15527344, -0.90527344, -0.17675781, 0.0, -0.15527344, -0.90527344, -0.17675781, 0.0 - - Name: Result4D - Format: Float16 - Stride: 8 - ZeroInitSize: 16 - - Name: ExpectedResult4D - Format: Float16 - Stride: 8 - Data: [ 0x3400, 0xb800, 0x3800, 0xba00, 0x3400, 0xb800, 0x3800, 0xba00 ] - # 0.25, -0.5, 0.5, -0.75, 0.25, -0.5, 0.5, -0.75 -Results: - - Result: CheckResult2D - Rule: BufferFloatULP - ULPT: 1 - Actual: Result2D - Expected: ExpectedResult2D - - Result: CheckResult3D - Rule: BufferFloatULP - ULPT: 1 - Actual: Result3D - Expected: ExpectedResult3D - - Result: CheckResult4D - Rule: BufferFloatULP - ULPT: 1 - Actual: Result4D - Expected: ExpectedResult4D - -DescriptorSets: - - Resources: - - Name: IncidentRay2D - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: IncidentRay3D - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: IncidentRay4D - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Wall2D - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Wall3D - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Wall4D - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Result2D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Result3D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: Result4D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 -... -#--- end -# REQUIRES: Half - -# https://github.com/KhronosGroup/SPIRV-Cross/issues/2524 -# XFAIL: Vulkan-Darwin - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/reflect.32.test b/test/Feature/HLSLLib/reflect.32.test deleted file mode 100644 index cc84ac93..00000000 --- a/test/Feature/HLSLLib/reflect.32.test +++ /dev/null @@ -1,187 +0,0 @@ -#--- source.hlsl - -// This test tests three different reflection scenarios -// One in 2D, 3D, and 4D - -StructuredBuffer IncidentRay2D : register(t0); -StructuredBuffer IncidentRay3D : register(t1); -StructuredBuffer IncidentRay4D : register(t2); -StructuredBuffer Wall2D : register(t3); -StructuredBuffer Wall3D : register(t4); -StructuredBuffer Wall4D : register(t5); -RWStructuredBuffer Result2D : register(u6); -RWStructuredBuffer Result3D : register(u7); -RWStructuredBuffer Result4D : register(u8); - -[numthreads(1,1,1)] -void main() { - // 2D case - float2 result2D = reflect(IncidentRay2D[0].xy, normalize(Wall2D[0].xy)); - Result2D[0] = result2D; - float2 result2D_constant = reflect(float2(0.75, -0.5), float2(0.70710677, 0.70710677)); - Result2D[1] = result2D_constant; - - // 3D case, using float4 for alignment - float4 result3D = float4(reflect(IncidentRay3D[0].xyz, normalize(Wall3D[0].xyz)), 0.0); - Result3D[0] = result3D; - float4 result3D_constant = float4(reflect(float3(0.5, -0.25, 0.75), float3(0.5, 0.5, 0.70710677)), 0.0); - Result3D[1] = result3D_constant; - - // 4D case - float4 result4D = reflect(IncidentRay4D[0].xyzw, normalize(Wall4D[0].xyzw)); - Result4D[0] = result4D; - float4 result4D_constant = reflect(float4(0.5, -0.25, 0.75, -0.5), float4(0.5, 0.5, 0.5, 0.5)); - Result4D[1] = result4D_constant; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - - Name: IncidentRay2D - Format: Float32 - Stride: 8 - Data: [ 0.75, -0.5 ] - - Name: IncidentRay3D - Format: Float32 - Stride: 16 - Data: [ 0.5, -0.25, 0.75, 0.0 ] - - Name: IncidentRay4D - Format: Float32 - Stride: 16 - Data: [ 0.5, -0.25, 0.75, -0.5 ] - - - Name: Wall2D - Format: Float32 - Stride: 8 - Data: [ 0.70710677, 0.70710677 ] - - Name: Wall3D - Format: Float32 - Stride: 16 - Data: [ 0.5, 0.5, 0.70710677, 0.0 ] - - Name: Wall4D - Format: Float32 - Stride: 16 - Data: [ 0.5, 0.5, 0.5, 0.5 ] - - - Name: Result2D - Format: Float32 - Stride: 8 - ZeroInitSize: 16 - - Name: ExpectedResult2D - Format: Float32 - Stride: 8 - Data: [ 0.5, -0.75, 0.5, -0.75 ] - - - Name: Result3D - Format: Float32 - Stride: 16 - ZeroInitSize: 32 - - Name: ExpectedResult3D - Format: Float32 - Stride: 16 - Data: [ -0.15533, -0.90533, -0.176777, 0.0, -0.15533, -0.90533, -0.176777, 0.0 ] - - - Name: Result4D - Format: Float32 - Stride: 16 - ZeroInitSize: 32 - - Name: ExpectedResult4D - Format: Float32 - Stride: 16 - Data: [ 0.25, -0.5, 0.5, -0.75, 0.25, -0.5, 0.5, -0.75 ] - -Results: - - Result: CheckResult2D - Rule: BufferFloatEpsilon - Epsilon: 0.000002 - Actual: Result2D - Expected: ExpectedResult2D - - Result: CheckResult3D - Rule: BufferFloatEpsilon - Epsilon: 0.000002 - Actual: Result3D - Expected: ExpectedResult3D - - Result: CheckResult4D - Rule: BufferFloatEpsilon - Epsilon: 0.000002 - Actual: Result4D - Expected: ExpectedResult4D - -DescriptorSets: - - Resources: - - Name: IncidentRay2D - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: IncidentRay3D - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: IncidentRay4D - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Wall2D - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Wall3D - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Wall4D - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Result2D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Result3D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: Result4D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 -... -#--- end -# UNSUPPORTED: Clang-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/refract.16.test b/test/Feature/HLSLLib/refract.16.test deleted file mode 100644 index c245170f..00000000 --- a/test/Feature/HLSLLib/refract.16.test +++ /dev/null @@ -1,195 +0,0 @@ -#--- source.hlsl - -// This test tests three different refraction scenarios -// One in 2D, 3D, and 4D - -StructuredBuffer IncidentRay2D : register(t0); -StructuredBuffer IncidentRay3D : register(t1); -StructuredBuffer IncidentRay4D : register(t2); -StructuredBuffer Wall2D : register(t3); -StructuredBuffer Wall3D : register(t4); -StructuredBuffer Wall4D : register(t5); -RWStructuredBuffer Result2D : register(u6); -RWStructuredBuffer Result3D : register(u7); -RWStructuredBuffer Result4D : register(u8); - -[numthreads(1,1,1)] -void main() { - // 2D case - half2 result2D = refract(IncidentRay2D[0], normalize(Wall2D[0]), 0.5); - Result2D[0] = result2D; - half2 result2D_constant = refract(half2(0.75, -0.5), half2(0.70710677, 0.70710677), half(0.5)); - Result2D[1] = result2D_constant; - // the below case is a case of total internal reflection, the 0 vector is expected - Result2D[2] = refract(half2(0.75, -0.5), half2(0.70710677, 0.70710677), 1.3); - - // 3D case, using half4 for alignment - half4 result3D = half4(refract(IncidentRay3D[0].xyz, normalize(Wall3D[0].xyz), 0.5), half(0.0)); - Result3D[0] = result3D; - half4 result3D_constant = half4(refract(half3(0.5, -0.25, 0.75), half3(0.5, 0.5, 0.70710677), 0.5), half(0.0)); - Result3D[1] = result3D_constant; - - // 4D case - half4 result4D = refract(IncidentRay4D[0].xyzw, normalize(Wall4D[0].xyzw), 0.5); - Result4D[0] = result4D; - half4 result4D_constant = refract(half4(0.5, -0.25, 0.75, -0.5), half4(0.5, 0.5, 0.5, 0.5), half(0.5)); - Result4D[1] = result4D_constant; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - - Name: IncidentRay2D - Format: Float16 - Stride: 4 - Data: [ 0x3a00, 0xb800 ] - - Name: IncidentRay3D - Format: Float16 - Stride: 8 - Data: [ 0x3800, 0xb400, 0x3a00, 0x0 ] - # 0.5, -0.25, 0.75, 0.0 - - Name: IncidentRay4D - Format: Float16 - Stride: 8 - Data: [ 0x3800, 0xb400, 0x3a00, 0xb800] - # 0.5, -0.25, 0.75, -0.5 - - Name: Wall2D - Format: Float16 - Stride: 4 - Data: [ 0x39a8, 0x39a8 ] - # 0.70703125, 0.70703125 - - Name: Wall3D - Format: Float16 - Stride: 8 - Data: [ 0x3800, 0x3800, 0x39a8, 0x0 ] - # 0.5, 0.5, 0.70703125, 0.0 - - Name: Wall4D - Format: Float16 - Stride: 8 - Data: [ 0x3800, 0x3800, 0x3800, 0x3800 ] - # 0.5, 0.5, 0.5, 0.5 - - Name: Result2D - Format: Float16 - Stride: 4 - ZeroInitSize: 16 - - Name: ExpectedResult2D - Format: Float16 - Stride: 4 - Data: [ 0xb4d9, 0xbb6d, 0xb4d9, 0xbb6d, 0x0, 0x0, 0x0, 0x0 ] - # -0.30297852, -0.92822266, -0.30297852, -0.92822266, 0.0, 0.0, 0.0, 0.0, - - Name: Result3D - Format: Float16 - Stride: 8 - ZeroInitSize: 16 - - Name: ExpectedResult3D - Format: Float16 - Stride: 8 - Data: [ 0xb607, 0xba04, 0xb817, 0x0, 0xb607, 0xba04, 0xb817, 0x0 ] - # -0.37670898, -0.7519531, 0.51123047, 0.0, -0.37670898, -0.7519531, 0.51123047, 0.0 - - Name: Result4D - Format: Float16 - Stride: 8 - ZeroInitSize: 16 - - Name: ExpectedResult4D - Format: Float16 - Stride: 8 - Data: [ 0xb400, 0xb900, 0xb000, 0xba00, 0xb400, 0xb900, 0xb000, 0xba00 ] - # -0.25, -0.625, -0.125, -0.75, -0.25, -0.625, -0.125, -0.75 -Results: - - Result: CheckResult2D - Rule: BufferFloatULP - ULPT: 2 - Actual: Result2D - Expected: ExpectedResult2D - - Result: CheckResult3D - Rule: BufferFloatULP - ULPT: 2 - Actual: Result3D - Expected: ExpectedResult3D - - Result: CheckResult4D - Rule: BufferFloatULP - ULPT: 2 - Actual: Result4D - Expected: ExpectedResult4D - -DescriptorSets: - - Resources: - - Name: IncidentRay2D - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: IncidentRay3D - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: IncidentRay4D - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Wall2D - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Wall3D - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Wall4D - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Result2D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Result3D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: Result4D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 -... -#--- end -# REQUIRES: Half - -# https://github.com/KhronosGroup/SPIRV-Cross/issues/2524 -# XFAIL: Vulkan-Darwin - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/refract.32.test b/test/Feature/HLSLLib/refract.32.test deleted file mode 100644 index 517f2f6a..00000000 --- a/test/Feature/HLSLLib/refract.32.test +++ /dev/null @@ -1,183 +0,0 @@ -#--- source.hlsl - -// This test tests three different refraction scenarios -// One in 2D, 3D, and 4D - -StructuredBuffer IncidentRay2D : register(t0); -StructuredBuffer IncidentRay3D : register(t1); -StructuredBuffer IncidentRay4D : register(t2); -StructuredBuffer Wall2D : register(t3); -StructuredBuffer Wall3D : register(t4); -StructuredBuffer Wall4D : register(t5); -RWStructuredBuffer Result2D : register(u6); -RWStructuredBuffer Result3D : register(u7); -RWStructuredBuffer Result4D : register(u8); - -[numthreads(1,1,1)] -void main() { - // 2D case - float2 result2D = refract(IncidentRay2D[0], normalize(Wall2D[0]), 0.5); - Result2D[0] = result2D; - float2 result2D_constant = refract(float2(0.75, -0.5), float2(0.70710677, 0.70710677), 0.5); - Result2D[1] = result2D_constant; - // the below case is a case of total internal reflection, the 0 vector is expected - Result2D[2] = refract(float2(0.75, -0.5), float2(0.70710677, 0.70710677), 1.3); - - // 3D case, using float4 for alignment - float4 result3D = float4(refract(IncidentRay3D[0].xyz, normalize(Wall3D[0].xyz), 0.5), 0.0); - Result3D[0] = result3D; - float4 result3D_constant = float4(refract(float3(0.5, -0.25, 0.75), float3(0.5, 0.5, 0.70710677), 0.5), 0.0); - Result3D[1] = result3D_constant; - - // 4D case - float4 result4D = refract(IncidentRay4D[0].xyzw, normalize(Wall4D[0].xyzw), 0.5); - Result4D[0] = result4D; - float4 result4D_constant = refract(float4(0.5, -0.25, 0.75, -0.5), float4(0.5, 0.5, 0.5, 0.5), 0.5); - Result4D[1] = result4D_constant; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - - Name: IncidentRay2D - Format: Float32 - Stride: 8 - Data: [ 0.75, -0.5 ] - - Name: IncidentRay3D - Format: Float32 - Stride: 16 - Data: [ 0.5, -0.25, 0.75, 0.0 ] - - Name: IncidentRay4D - Format: Float32 - Stride: 16 - Data: [ 0.5, -0.25, 0.75, -0.5 ] - - Name: Wall2D - Format: Float32 - Stride: 8 - Data: [ 0.70710677, 0.70710677 ] - - Name: Wall3D - Format: Float32 - Stride: 16 - Data: [ 0.5, 0.5, 0.70710677, 0.0 ] - - Name: Wall4D - Format: Float32 - Stride: 16 - Data: [ 0.5, 0.5, 0.5, 0.5 ] - - Name: Result2D - Format: Float32 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedResult2D - Format: Float32 - Stride: 8 - Data: [ -0.303054, -0.928054, -0.303054, -0.928054, 0.0, 0.0, 0.0, 0.0 ] - - Name: Result3D - Format: Float32 - Stride: 16 - ZeroInitSize: 32 - - Name: ExpectedResult3D - Format: Float32 - Stride: 16 - Data: [ -0.376802, -0.751802, -0.511432, 0.0, -0.376802, -0.751802, -0.511432, 0.0 ] - - Name: Result4D - Format: Float32 - Stride: 16 - ZeroInitSize: 32 - - Name: ExpectedResult4D - Format: Float32 - Stride: 16 - Data: [ -0.25, -0.625, -0.125, -0.75, -0.25, -0.625, -0.125, -0.75 ] -Results: - - Result: CheckResult2D - Rule: BufferFloatULP - ULPT: 40 - Actual: Result2D - Expected: ExpectedResult2D - - Result: CheckResult3D - Rule: BufferFloatULP - ULPT: 40 - Actual: Result3D - Expected: ExpectedResult3D - - Result: CheckResult4D - Rule: BufferFloatULP - ULPT: 40 - Actual: Result4D - Expected: ExpectedResult4D - -DescriptorSets: - - Resources: - - Name: IncidentRay2D - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: IncidentRay3D - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: IncidentRay4D - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Wall2D - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Wall3D - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Wall4D - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Result2D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Result3D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: Result4D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/reversebits.16.test b/test/Feature/HLSLLib/reversebits.16.test deleted file mode 100644 index 2c6453c4..00000000 --- a/test/Feature/HLSLLib/reversebits.16.test +++ /dev/null @@ -1,69 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = reversebits(In[0]); - uint16_t4 Tmp = {reversebits(In[1].xyz), reversebits(In[1].w)}; - Out[1] = Tmp; - uint16_t4 Tmp2 = {reversebits(In[2].xy), reversebits(uint16_t2(0, 256))}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: UInt16 - Stride: 8 - Data: [ 0, 1, 8, 0x0100, 0x7FFF, 0xFFFF, 0x5555, 0xF0F0, 0x0F88, 0xC003, 0, 0 ] # Last two are filler - - Name: Out - Format: UInt16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: UInt16 - Stride: 8 - Data: [ 0, 32768, 4096, 0x0080, 0xFFFE, 0xFFFF, 0xAAAA, 0x0F0F, 0x11F0, 0xC003, 0, 128 ] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7680 -# XFAIL: DXC-Vulkan - -# https://github.com/llvm/llvm-project/issues/152049 -# XFAIL: Clang-Vulkan - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/reversebits.32.test b/test/Feature/HLSLLib/reversebits.32.test deleted file mode 100644 index 80a42c9d..00000000 --- a/test/Feature/HLSLLib/reversebits.32.test +++ /dev/null @@ -1,65 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = reversebits(In[0]); - uint32_t4 Tmp = {reversebits(In[1].xyz), reversebits(In[1].w)}; - Out[1] = Tmp; - uint32_t4 Tmp2 = {reversebits(In[2].xy), reversebits(uint32_t2(0, 65536))}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: UInt32 - Stride: 16 - Data: [ 0, 1, 8, 0x00010000, 0x7FFFFFFF, 0xFFFFFFFF, 0x55555555, 0xF0F0F0F0, 0x0F880F88, 0xC003C003, 0, 0 ] # Last two are filler - - Name: Out - Format: UInt32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: UInt32 - Stride: 16 - Data: [ 0, 2147483648, 268435456, 0x00008000, 0xFFFFFFFE, 0xFFFFFFFF, 0xAAAAAAAA, 0x0F0F0F0F, 0x11F011F0, 0xC003C003, 0, 32768 ] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/152049 -# XFAIL: Clang-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/reversebits.64.test b/test/Feature/HLSLLib/reversebits.64.test deleted file mode 100644 index 272e2593..00000000 --- a/test/Feature/HLSLLib/reversebits.64.test +++ /dev/null @@ -1,69 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = reversebits(In[0]); - uint64_t4 Tmp = {reversebits(In[1].xyz), reversebits(In[1].w)}; - Out[1] = Tmp; - uint64_t4 Tmp2 = {reversebits(In[2].xy), reversebits(uint64_t2(0, 4294967296))}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: UInt64 - Stride: 32 - Data: [ 0, 1, 8, 0x0000000100000000, 0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x5555555555555555, 0xF0F0F0F0F0F0F0F0, 0x0F880F880F880F88, 0xC003C003C003C003, 0, 0 ] # Last two are filler - - Name: Out - Format: UInt64 - Stride: 32 - ZeroInitSize: 96 - - Name: ExpectedOut # The result we expect - Format: UInt64 - Stride: 32 - Data: [ 0, 9223372036854775808, 1152921504606846976, 0x0000000080000000, 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xAAAAAAAAAAAAAAAA, 0x0F0F0F0F0F0F0F0F, 0x11F011F011F011F0, 0xC003C003C003C003, 0, 2147483648 ] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7680 -# XFAIL: DXC-Vulkan - -# https://github.com/llvm/llvm-project/issues/152049 -# XFAIL: Clang-Vulkan - -# REQUIRES: Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/round.16.test b/test/Feature/HLSLLib/round.16.test deleted file mode 100644 index 0d89c1d3..00000000 --- a/test/Feature/HLSLLib/round.16.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = round(In[0]); - half4 Tmp = {round(In[1].xyz), round(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {round(In[2].xy), round(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = round(In[3]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x4900, 0x4933, 0x4940, 0x494d, 0x49c0, 0xc900, 0xc933, 0xc940, 0xc94d,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 10.0, 10.4, 10.5, 10.6, 11.5, -10.0, -10.4, -10.5, -10.6, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8000, 0x8000, 0x0000, 0x0000, 0x7c00, 0x4900, 0x4900, 0x4900, 0x4980, 0x4a00, 0xc900, 0xc900, 0xc900, 0xc980,] - # NaN, -Inf, -0, -0, 0, 0, Inf, 10.0, 10.0, 10.0, 11.0, 12.0, -10.0, -10.0, -10.0, -11.0, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/round.32.test b/test/Feature/HLSLLib/round.32.test deleted file mode 100644 index c113a84d..00000000 --- a/test/Feature/HLSLLib/round.32.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = round(In[0]); - float4 Tmp = {round(In[1].xyz), round(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {round(In[2].xy), round(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = round(In[3]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 10.0, 10.4, 10.5, 10.6, 11.5, -10.0, -10.4, -10.5, -10.6,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 10.0, 10.4, 10.5, 10.6, 11.5, -10.0, -10.4, -10.5, -10.6, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0, -0, 0, 0, inf, 10.0, 10.0, 10.0, 11.0, 12.0, -10.0, -10.0, -10.0, -11.0,] - # NaN, -Inf, -0, -0, 0, 0, Inf, 10.0, 10.0, 10.0, 11.0, 12.0, -10.0, -10.0, -10.0, -11.0, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/rsqrt.16.test b/test/Feature/HLSLLib/rsqrt.16.test deleted file mode 100644 index a17b88a9..00000000 --- a/test/Feature/HLSLLib/rsqrt.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = rsqrt(In[0]); - half4 Tmp = {rsqrt(In[1].xyz), rsqrt(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {rsqrt(In[2].xy), rsqrt(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x03FF, 0x7c00, 0xbc00, 0x4c00, 0x5c00, 0x7bff, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, 0.00006097555, Inf, -1, 16.0, 256.0, 65504, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0x7e00, 0xfc00, 0x7c00, 0x5801, 0x0000, 0x7e00, 0x3400, 0x2c00, 0x1C00, 0x7e00,] - # NaN, NaN, NaN, -Inf, Inf, 128.125, 0, NaN, 0.25, 0.0625, 0.00390625, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/rsqrt.32.test b/test/Feature/HLSLLib/rsqrt.32.test deleted file mode 100644 index 1639ce2b..00000000 --- a/test/Feature/HLSLLib/rsqrt.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = rsqrt(In[0]); - float4 Tmp = {rsqrt(In[1].xyz), rsqrt(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {rsqrt(In[2].xy), rsqrt(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, -1, 16.0, 256.0, 65536.0, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, -1, 16.0, 256.0, 65536.0, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, nan, -inf, -inf, inf, inf, 0, nan, 0.25, 0.0625, 0.00390625, nan,] - # NaN, NaN, -Inf, -Inf, Inf, Inf, 0, NaN, 0.25, 0.0625, 0.00390625, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/saturate.16.test b/test/Feature/HLSLLib/saturate.16.test deleted file mode 100644 index 5e423231..00000000 --- a/test/Feature/HLSLLib/saturate.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = saturate(In[0]); - half4 Tmp = {saturate(In[1].xyz), saturate(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {saturate(In[2].xy), saturate(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x3800, 0x1d1f, 0x3bfe,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, 0.5, 0.005, 0.999, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x0, 0x0, 0x0, 0x0000, 0x0000, 0x0001, 0x3c00, 0x3c00, 0x0000, 0x3800, 0x1d1f, 0x3bfe,] - # 0, 0, 0, 0, 0, denorm, 1, 1, 0, 0.5, 0.005, 0.999, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/saturate.32.test b/test/Feature/HLSLLib/saturate.32.test deleted file mode 100644 index 20d74c2f..00000000 --- a/test/Feature/HLSLLib/saturate.32.test +++ /dev/null @@ -1,58 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = saturate(In[0]); - Out[1].x = saturate(In[1].x); - Out[1].yzw = saturate(In[1].yzw); - Out[2].xy = saturate(In[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ 0, 1.0, -1.0, -2.0, 84.0, -inf, nan, inf ] - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: In -# CHECK: Format: Float32 - -# CHECK: - Name: Out -# CHECK: Format: Float32 -# CHECK: Data: [ 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0 ] diff --git a/test/Feature/HLSLLib/sign.32.test b/test/Feature/HLSLLib/sign.32.test deleted file mode 100644 index 295f5a93..00000000 --- a/test/Feature/HLSLLib/sign.32.test +++ /dev/null @@ -1,186 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -StructuredBuffer In3 : register(t2); -StructuredBuffer In4 : register(t3); -RWStructuredBuffer Out1 : register(u4); -RWStructuredBuffer Out2 : register(u5); -RWStructuredBuffer Out3 : register(u6); -RWStructuredBuffer Out4 : register(u7); - -[numthreads(1,1,1)] -void main() { - // int - Out1[0] = sign(In1[0]); - int4 Tmp = {sign(In1[0].xyz), sign(In1[0].w)}; - Out1[1] = Tmp; - Out1[2].xy = sign(In1[0].xy); - - // uint - Out2[0] = sign(In2[0]); - int4 Tmp2 = {sign(In2[0].xyz), sign(In2[0].w)}; - Out2[1] = Tmp2; - Out2[2].xy = sign(In2[0].xy); - - // float - Out3[0] = sign(In3[0]); - int4 Tmp3 = {sign(In3[1].xyz), sign(In3[1].w)}; - Out3[1] = Tmp3; - Out3[2].xy = sign(In3[0].xy); - - int X = sign(In4[0]); // testing nan - Out4[0] = (X == 1 || X == 0 || X == -1); - X = sign(In4[1]); // testing -nan - Out4[1] = (X == 1 || X == 0 || X == -1); - X = sign(In4[2]); // testing denorm - Out4[2] = (X == 1 || X == 0); - X = sign(In4[3]); // testing -denorm - Out4[3] = (X == 0 || X == -1); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Int32 - Stride: 16 - Data: [-1, 0, -2147483648, 2147483647] - - Name: In2 - Format: UInt32 - Stride: 16 - Data: [1, 0xffffffff, 0, 10] - - Name: In3 - Format: Float32 - Stride: 16 - Data: [0, -0, -1.3, inf, -inf, -0.5, -0.05, 19] - - Name: In4 - Format: Float32 - Stride: 4 - Data: [nan, -nan, 0x1.e7d42cp-127, -0x1.e7d42cp-127] - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut1 # The result we expect - Format: Int32 - Stride: 16 - Data: [-1, 0, -1, 1, -1, 0, -1, 1, -1, 0, 0, 0] # Last two are filler - - Name: Out2 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut2 # The result we expect - Format: Int32 - Stride: 16 - Data: [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0] # Last two are filler - - Name: Out3 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut3 # The result we expect - Format: Int32 - Stride: 16 - Data: [0, 0, -1, 1, -1, -1, -1, 1, 0, 0, 0, 0] - - Name: Out4 - Format: UInt32 - Stride: 4 - ZeroInitSize: 16 - - Name: ExpectedOut4 - Format: UInt32 - Stride: 4 - Data: [1, 1, 1, 1] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 - - Result: Test3 - Rule: BufferExact - Actual: Out3 - Expected: ExpectedOut3 - - Result: Test4 - Rule: BufferExact - Actual: Out4 - Expected: ExpectedOut4 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: In3 - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: In4 - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: Out3 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: Out4 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 -... -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7512 -# XFAIL: DXC-Vulkan - -# https://github.com/llvm/llvm-project/issues/149722 -# We're generating invalid SPIRV for this. I have _no_ idea why this isn't -# failing on all Clang Vulkan tests. -# XFAIL: Clang && Vulkan-Darwin - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sign.fp16.test b/test/Feature/HLSLLib/sign.fp16.test deleted file mode 100644 index 4ce0c34d..00000000 --- a/test/Feature/HLSLLib/sign.fp16.test +++ /dev/null @@ -1,103 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out1 : register(u2); -RWStructuredBuffer Out2 : register(u3); - -[numthreads(1,1,1)] -void main() { - Out1[0] = sign(In1[0]); - int4 Tmp = {sign(In1[1].xyz), sign(In1[1].w)}; - Out1[1] = Tmp; - Out1[2].xy = sign(In1[0].xy); - - int X = sign(In2[0]); // testing nan - Out2[0] = (X == 1 || X == 0 || X == -1); - X = sign(In2[1]); // testing -nan - Out2[1] = (X == 1 || X == 0 || X == -1); - X = sign(In2[2]); // testing denorm - Out2[2] = (X == 1 || X == 0); - X = sign(In2[3]); // testing -denorm - Out2[3] = (X == 0 || X == -1); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Float16 - Stride: 8 - Data: [0x0000, 0x8000, 0xbd33, 0x7c00, 0xfc00, 0xb800, 0xaa66, 0x4cc0] - # 0, -0, -1.3, inf, -inf, -0.5, -0.05, 19 - - Name: In2 # special values where the result might differ based on platform or compiler - Format: Float16 - Stride: 2 - Data: [0x7e00, 0xfe00, 0x0001, 0x8001] # nan, -nan, denorm, -denorm - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: Out2 - Format: UInt32 - Stride: 4 - ZeroInitSize: 16 - - Name: ExpectedOut1 # The result we expect - Format: Int32 - Stride: 16 - Data: [0, 0, -1, 1, -1, -1, -1, 1, 0, 0, 0, 0] - - Name: ExpectedOut2 - Format: UInt32 - Stride: 4 - Data: [1, 1, 1, 1] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sign.fp64.test b/test/Feature/HLSLLib/sign.fp64.test deleted file mode 100644 index d908b059..00000000 --- a/test/Feature/HLSLLib/sign.fp64.test +++ /dev/null @@ -1,98 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out1 : register(u2); -RWStructuredBuffer Out2 : register(u3); - -[numthreads(1,1,1)] -void main() { - Out1[0] = sign(In1[0]); - int4 Tmp = {sign(In1[1].xyz), sign(In1[1].w)}; - Out1[1] = Tmp; - Out1[2].xy = sign(In1[2].xy); - - int X = sign(In2[0]); // testing nan - Out2[0] = (X == 1 || X == 0 || X == -1); - X = sign(In2[1]); // testing -nan - Out2[1] = (X == 1 || X == 0 || X == -1); - X = sign(In2[2]); // testing denorm - Out2[2] = (X == 1 || X == 0); - X = sign(In2[3]); // testing -denorm - Out2[3] = (X == 0 || X == -1); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Float64 - Stride: 32 - Data: [0, -0, -1.3, inf, -inf, -0.5, -0.05, 19] - - Name: In2 - Format: Float64 - Stride: 8 - Data: [nan, -nan, 0x0.fffffffffffffp-1022, -0x0.fffffffffffffp-1022] - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut1 # The result we expect - Format: Int32 - Stride: 16 - Data: [0, 0, -1, 1, -1, -1, -1, 1, 0, 0, 0, 0] - - Name: Out2 - Format: UInt32 - Stride: 4 - ZeroInitSize: 16 - - Name: ExpectedOut2 - Format: UInt32 - Stride: 4 - Data: [1, 1, 1, 1] -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# REQUIRES: Double -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sign.int16.test b/test/Feature/HLSLLib/sign.int16.test deleted file mode 100644 index e63ce1b1..00000000 --- a/test/Feature/HLSLLib/sign.int16.test +++ /dev/null @@ -1,103 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out1 : register(u2); -RWStructuredBuffer Out2 : register(u3); - -[numthreads(1,1,1)] -void main() { - // int16_t - Out1[0] = sign(In1[0]); - int4 Tmp = {sign(In1[0].xyz), sign(In1[0].w)}; - Out1[1] = Tmp; - Out1[2].xy = sign(In1[0].xy); - - // uint16_t - Out2[0] = sign(In2[0]); - int4 Tmp2 = {sign(In2[0].xyz), sign(In2[0].w)}; - Out2[1] = Tmp2; - Out2[2].xy = sign(In2[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Int16 - Stride: 8 - Data: [-1, 0, -32768, 32767] - - Name: In2 - Format: UInt16 - Stride: 8 - Data: [1, 65535, 0, 10] - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut1 # The result we expect - Format: Int32 - Stride: 16 - Data: [-1, 0, -1, 1, -1, 0, -1, 1, -1, 0, 0, 0] # Last two are filler - - Name: Out2 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut2 # The result we expect - Format: Int32 - Stride: 16 - Data: [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0] # Last two are filler -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7512 -# XFAIL: DXC-Vulkan - -# REQUIRES: Int16 -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sign.int64.test b/test/Feature/HLSLLib/sign.int64.test deleted file mode 100644 index 20bf6de1..00000000 --- a/test/Feature/HLSLLib/sign.int64.test +++ /dev/null @@ -1,103 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In1 : register(t0); -StructuredBuffer In2 : register(t1); -RWStructuredBuffer Out1 : register(u2); -RWStructuredBuffer Out2 : register(u3); - -[numthreads(1,1,1)] -void main() { - // int64_t - Out1[0] = sign(In1[0]); - int4 Tmp = {sign(In1[0].xyz), sign(In1[0].w)}; - Out1[1] = Tmp; - Out1[2].xy = sign(In1[0].xy); - - // uint64_t - Out2[0] = sign(In2[0]); - int4 Tmp2 = {sign(In2[0].xyz), sign(In2[0].w)}; - Out2[1] = Tmp2; - Out2[2].xy = sign(In2[0].xy); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In1 - Format: Int64 - Stride: 32 - Data: [-1, 0, -9223372036854775808, 9223372036854775807] - - Name: In2 - Format: UInt64 - Stride: 32 - Data: [1, 0xFFFFFFFFFFFFFFFF, 0, 10] - - Name: Out1 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut1 # The result we expect - Format: Int32 - Stride: 16 - Data: [-1, 0, -1, 1, -1, 0, -1, 1, -1, 0, 0, 0] # Last two are filler - - Name: Out2 - Format: Int32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut2 # The result we expect - Format: Int32 - Stride: 16 - Data: [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0] # Last two are filler -Results: - - Result: Test1 - Rule: BufferExact - Actual: Out1 - Expected: ExpectedOut1 - - Result: Test2 - Rule: BufferExact - Actual: Out2 - Expected: ExpectedOut2 -DescriptorSets: - - Resources: - - Name: In1 - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: In2 - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# https://github.com/microsoft/DirectXShaderCompiler/issues/7512 -# XFAIL: DXC-Vulkan - -# REQUIRES: Int64 -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sin.16.test b/test/Feature/HLSLLib/sin.16.test deleted file mode 100644 index 98babac1..00000000 --- a/test/Feature/HLSLLib/sin.16.test +++ /dev/null @@ -1,69 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = sin(In[0]); - half4 Tmp = {sin(In[1].xyz), sin(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {sin(In[2].xy), sin(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = sin(In[3]); - Out[4] = sin(In[4]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3906, 0x3d06, 0x3f8a, 0x4106, 0x4248, 0x438a, 0x4466, 0x4506, 0x45a7, 0x4648, 0x7e00, 0x7e00, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 0.6279297, 1.255859, 1.884766, 2.511719, 3.140625, 3.769531, 4.398438, 5.023438, 5.652344, 6.281250, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 40 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0x8000, 0x8000, 0x0000, 0x0000, 0x7e00, 0x38b3, 0x3b9b, 0x3b9c, 0x38b6, 0x13ed, 0xb8b3, 0xbb9c, 0xbb9e, 0xb8b8, 0x97ed, 0x7e00, 0x7e00, 0x7e00,] - # NaN, NaN, -0, -0, 0, 0, NaN, 0.58747065, 0.95081574, 0.95111507, 0.58904284, 0.00096773, -0.58747751, -0.95112079, -0.95201313, -0.58982444, -0.00193545, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.003 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sin.32.test b/test/Feature/HLSLLib/sin.32.test deleted file mode 100644 index 56f86c34..00000000 --- a/test/Feature/HLSLLib/sin.32.test +++ /dev/null @@ -1,69 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = sin(In[0]); - float4 Tmp = {sin(In[1].xyz), sin(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {sin(In[2].xy), sin(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, -314.16, 314.16, nan, nan, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, -314.16, 314.16, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, nan, -0, -0, 0, 0, nan, -0.0007346401, 0.0007346401, nan, nan, nan,] - # NaN, NaN, -0, -0, 0, 0, NaN, -0.0007346401, 0.0007346401, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0.0008 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/KhronosGroup/SPIRV-Cross/issues/2525 -# XFAIL: Vulkan-Darwin - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sinh.16.test b/test/Feature/HLSLLib/sinh.16.test deleted file mode 100644 index 1cc9a9c6..00000000 --- a/test/Feature/HLSLLib/sinh.16.test +++ /dev/null @@ -1,70 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = sinh(In[0]); - half4 Tmp = {sinh(In[1].xyz), sinh(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {sinh(In[2].xy), sinh(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x3c00, 0xbc00, 0x7e00, 0x7e00, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x0000, 0x0000, 0x0000, 0x0000, 0x7c00, 0x3cb3, 0xbcb3, 0x7e00, 0x7e00, 0x7e00,] - # NaN, -Inf, 0.0, 0.0, 0.0, 0.0, Inf, 1.175201, -1.175201, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 2 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/KhronosGroup/SPIRV-Cross/issues/2507 -# XFAIL: Vulkan-Darwin - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sinh.32.test b/test/Feature/HLSLLib/sinh.32.test deleted file mode 100644 index 58b86beb..00000000 --- a/test/Feature/HLSLLib/sinh.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = sinh(In[0]); - float4 Tmp = {sinh(In[1].xyz), sinh(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {sinh(In[2].xy), sinh(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 1, -1, nan, nan, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 1, -1, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, -inf, 0.0, 0.0, 0.0, 0.0, inf, 1.175201, -1.175201, nan, nan, nan,] - # NaN, -Inf, 0.0, 0.0, 0.0, 0.0, Inf, 1.175201, -1.175201, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 4 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/smoothstep.16.test b/test/Feature/HLSLLib/smoothstep.16.test deleted file mode 100644 index 40f35d29..00000000 --- a/test/Feature/HLSLLib/smoothstep.16.test +++ /dev/null @@ -1,235 +0,0 @@ -#--- source.hlsl - -StructuredBuffer Min2D : register(t0); -StructuredBuffer Min3D : register(t1); -StructuredBuffer Min4D : register(t2); -StructuredBuffer Max2D : register(t3); -StructuredBuffer Max3D : register(t4); -StructuredBuffer Max4D : register(t5); -StructuredBuffer X2D : register(t6); -StructuredBuffer X3D : register(t7); -StructuredBuffer X4D : register(t8); -RWStructuredBuffer Result2D : register(u9); -RWStructuredBuffer Result3D : register(u10); -RWStructuredBuffer Result4D : register(u11); - -[numthreads(1,1,1)] -void main() { - // 2D case - half2 result2D = smoothstep(Min2D[0], Max2D[0], X2D[0]); - Result2D[0] = result2D; - half2 result2D_constant = smoothstep(half2(0.75, -0.5), half2(10.5, 24.5), half2(1.0, 23.0)); - Result2D[1] = result2D_constant; - - // we should also test cases where x is below and above the min and max bounds - // below case: - half2 result2D_below_min = smoothstep(Min2D[1], Max2D[1], X2D[1]); - Result2D[2] = result2D_below_min; - // above case: - half2 result2D_above_max = smoothstep(Min2D[2], Max2D[2], X2D[2]); - Result2D[3] = result2D_above_max; - - // 3D case, using half4 for alignment - half4 result3D = half4(smoothstep(Min3D[0], Max3D[0], X3D[0])); - Result3D[0] = result3D; - half4 result3D_constant = half4(smoothstep(half3(0.75, -0.5, 1.0), half3(10.5, 24.5, 3.0), half3(1.0, 23.0, 3.0)), 0.0); - Result3D[1] = result3D_constant; - - // 4D case - half4 result4D = smoothstep(Min4D[0], Max4D[0], X4D[0]); - Result4D[0] = result4D; - half4 result4D_constant = smoothstep(half4(0.75, -0.5, 1.0, -2.0), half4(10.5, 24.5, 3.0, -0.125), half4(1.0, 23.0, 2.5, -0.25)); - Result4D[1] = result4D_constant; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - - Name: Min2D - Format: Float16 - Stride: 4 - Data: [ 0x3a00, 0xb800, 0x3a00, 0xb800, 0x3a00, 0xb800, 0x0, 0x0 ] - # 0.75, -0.5, 0.75, -0.5, 0.75, -0.5, 0.0, 0.0 - - Name: Min3D - Format: Float16 - Stride: 8 - Data: [ 0x3a00, 0xb800, 0x3c00, 0x0] - # 0.75, -0.5, 1.0, 0.0 - - Name: Min4D - Format: Float16 - Stride: 8 - Data: [ 0x3a00, 0xb800, 0x3c00, 0xc000] - # 0.75, -0.5, 1.0, -2.0 - - Name: Max2D - Format: Float16 - Stride: 4 - Data: [ 0x4940, 0x4e20, 0x4940, 0x4e20, 0x4940, 0x4e20, 0x0, 0x0 ] - # 10.5, 24.5, 10.5, 24.5, 10.5, 24.5, 0x0, 0x0 - - Name: Max3D - Format: Float16 - Stride: 8 - Data: [ 0x4940, 0x4e20, 0x4200, 0x0 ] - # 10.5, 24.5, 3.0, 0.0 - - Name: Max4D - Format: Float16 - Stride: 8 - Data: [ 0x4940, 0x4e20, 0x4200, 0xb000 ] - # 10.5, 24.5, 3.0, -0.125 - - Name: X2D - Format: Float16 - Stride: 4 - Data: [ 0x3c00, 0x4dc0, 0xc580, 0xc580, 0x5004, 0x5004, 0x0, 0x0 ] - # 1.0, 23.0, -5.5, -5.5, 32.125, 32.125, 0.0, 0.0 - - Name: X3D - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0x4dc0, 0x4200, 0x0 ] - # 1.0, 23.0, 3.0, 0.0 - - Name: X4D - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0x4dc0, 0x4100, 0xb400 ] - # 1.0, 23.0, 2.5, -0.25 - - Name: Result2D - Format: Float16 - Stride: 4 - ZeroInitSize: 16 - - Name: ExpectedResult2D - Format: Float16 - Stride: 4 - Data: [ 0x17F0, 0x3BEA, 0x17F0, 0x3BEA, 0x0, 0x0, 0x3C00, 0x3C00 ] - # 0.0019378662, 0.9892578, 0.0019378662, 0.9892578, 0.0, 0.0, 1.0, 1.0 - - Name: Result3D - Format: Float16 - Stride: 8 - ZeroInitSize: 16 - - Name: ExpectedResult3D - Format: Float16 - Stride: 8 - Data: [ 0x17F0, 0x3BEA, 0x3C00, 0x0, 0x17F0, 0x3BEA, 0x3C00, 0x0] - # 0.0019378662, 0.9892578, 1.0, 0.0, 0.0019378662, 0.9892578, 1.0, 0.0 - - Name: Result4D - Format: Float16 - Stride: 8 - ZeroInitSize: 16 - - Name: ExpectedResult4D - Format: Float16 - Stride: 8 - Data: [ 0x17F0, 0x3BEA, 0x3AC0, 0x3BE6, 0x17F0, 0x3BEA, 0x3AC0, 0x3BE6 ] - # 0.0019378662, 0.9892578, 0.84375, 0.9873047, 0.0019378662, 0.9892578, 0.84375, 0.9873047 -Results: - - Result: CheckResult2D - Rule: BufferFloatULP - ULPT: 2 - Actual: Result2D - Expected: ExpectedResult2D - - Result: CheckResult3D - Rule: BufferFloatULP - ULPT: 2 - Actual: Result3D - Expected: ExpectedResult3D - - Result: CheckResult4D - Rule: BufferFloatULP - ULPT: 2 - Actual: Result4D - Expected: ExpectedResult4D - -DescriptorSets: - - Resources: - - Name: Min2D - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Min3D - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Min4D - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Max2D - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Max3D - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Max4D - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: X2D - Kind: StructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: X3D - Kind: StructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: X4D - Kind: StructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 - - Name: Result2D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 9 - Space: 0 - VulkanBinding: - Binding: 9 - - Name: Result3D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 10 - Space: 0 - VulkanBinding: - Binding: 10 - - Name: Result4D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 11 - Space: 0 - VulkanBinding: - Binding: 11 -... -#--- end -# REQUIRES: Half - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -enable-16bit-types -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/smoothstep.32.test b/test/Feature/HLSLLib/smoothstep.32.test deleted file mode 100644 index 0e5340a3..00000000 --- a/test/Feature/HLSLLib/smoothstep.32.test +++ /dev/null @@ -1,223 +0,0 @@ -#--- source.hlsl - -StructuredBuffer Min2D : register(t0); -StructuredBuffer Min3D : register(t1); -StructuredBuffer Min4D : register(t2); -StructuredBuffer Max2D : register(t3); -StructuredBuffer Max3D : register(t4); -StructuredBuffer Max4D : register(t5); -StructuredBuffer X2D : register(t6); -StructuredBuffer X3D : register(t7); -StructuredBuffer X4D : register(t8); -RWStructuredBuffer Result2D : register(u9); -RWStructuredBuffer Result3D : register(u10); -RWStructuredBuffer Result4D : register(u11); - -[numthreads(1,1,1)] -void main() { - // 2D case - float2 result2D = smoothstep(Min2D[0], Max2D[0], X2D[0]); - Result2D[0] = result2D; - float2 result2D_constant = smoothstep(float2(0.75, -0.5), float2(10.5, 24.5), float2(1.0, 23.0)); - Result2D[1] = result2D_constant; - - // we should also test cases where x is below and above the min and max bounds - // below case: - float2 result2D_below_min = smoothstep(Min2D[1], Max2D[1], X2D[1]); - Result2D[2] = result2D_below_min; - // above case: - float2 result2D_above_max = smoothstep(Min2D[2], Max2D[2], X2D[2]); - Result2D[3] = result2D_above_max; - - // 3D case, using float4 for alignment - float4 result3D = float4(smoothstep(Min3D[0], Max3D[0], X3D[0])); - Result3D[0] = result3D; - float4 result3D_constant = float4(smoothstep(float3(0.75, -0.5, 1.0), float3(10.5, 24.5, 3.0), float3(1.0, 23.0, 3.0)), 0.0); - Result3D[1] = result3D_constant; - - // 4D case - float4 result4D = smoothstep(Min4D[0], Max4D[0], X4D[0]); - Result4D[0] = result4D; - float4 result4D_constant = smoothstep(float4(0.75, -0.5, 1.0, -2.0), float4(10.5, 24.5, 3.0, -0.125), float4(1.0, 23.0, 2.5, -0.25)); - Result4D[1] = result4D_constant; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - - Name: Min2D - Format: Float32 - Stride: 8 - Data: [ 0.75, -0.5, 0.75, -0.5, 0.75, -0.5, 0.0, 0.0 ] - - Name: Min3D - Format: Float32 - Stride: 16 - Data: [ 0.75, -0.5, 1.0, 0.0 ] - - Name: Min4D - Format: Float32 - Stride: 16 - Data: [ 0.75, -0.5, 1.0, -2.0 ] - - Name: Max2D - Format: Float32 - Stride: 8 - Data: [ 10.5, 24.5, 10.5, 24.5, 10.5, 24.5, 0x0, 0x0 ] - - Name: Max3D - Format: Float32 - Stride: 16 - Data: [ 10.5, 24.5, 3.0, 0.0 ] - - Name: Max4D - Format: Float32 - Stride: 16 - Data: [ 10.5, 24.5, 3.0, -0.125 ] - - Name: X2D - Format: Float32 - Stride: 8 - Data: [ 1.0, 23.0, -5.5, -5.5, 32.125, 32.125, 0.0, 0.0 ] - - Name: X3D - Format: Float32 - Stride: 16 - Data: [ 1.0, 23.0, 3.0, 0.0 ] - - Name: X4D - Format: Float32 - Stride: 16 - Data: [ 1.0, 23.0, 2.5, -0.25 ] - - Name: Result2D - Format: Float32 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedResult2D - Format: Float32 - Stride: 8 - Data: [ 0x1.fc35fc0000000p-10, 0x1.fab10c0000000p-1, 0x1.fc35fc0000000p-10, 0x1.fab10c0000000p-1, 0x0.0000000000000p+0, 0x0.0000000000000p+0, 0x1.0000000000000p+0, 0x1.0000000000000p+0 ] - - Name: Result3D - Format: Float32 - Stride: 16 - ZeroInitSize: 32 - - Name: ExpectedResult3D - Format: Float32 - Stride: 16 - Data: [ 0x1.fc35fc0000000p-10, 0x1.fab10c0000000p-1, 0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x1.fc35fc0000000p-10, 0x1.fab10c0000000p-1, 0x1.0000000000000p+0, 0x0.0000000000000p+0 ] - - Name: Result4D - Format: Float32 - Stride: 16 - ZeroInitSize: 32 - - Name: ExpectedResult4D - Format: Float32 - Stride: 16 - Data: [ 0x1.fc35fc0000000p-10, 0x1.fab10c0000000p-1, 0x1.b000000000000p-1, 0x1.f97a0a0000000p-1, 0x1.fc35fc0000000p-10, 0x1.fab10c0000000p-1, 0x1.b000000000000p-1, 0x1.f97a0a0000000p-1 ] -Results: - - Result: CheckResult2D - Rule: BufferFloatULP - ULPT: 2 - Actual: Result2D - Expected: ExpectedResult2D - - Result: CheckResult3D - Rule: BufferFloatULP - ULPT: 2 - Actual: Result3D - Expected: ExpectedResult3D - - Result: CheckResult4D - Rule: BufferFloatULP - ULPT: 2 - Actual: Result4D - Expected: ExpectedResult4D - -DescriptorSets: - - Resources: - - Name: Min2D - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Min3D - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Min4D - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Max2D - Kind: StructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 - - Name: Max3D - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: Max4D - Kind: StructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 - - Name: X2D - Kind: StructuredBuffer - DirectXBinding: - Register: 6 - Space: 0 - VulkanBinding: - Binding: 6 - - Name: X3D - Kind: StructuredBuffer - DirectXBinding: - Register: 7 - Space: 0 - VulkanBinding: - Binding: 7 - - Name: X4D - Kind: StructuredBuffer - DirectXBinding: - Register: 8 - Space: 0 - VulkanBinding: - Binding: 8 - - Name: Result2D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 9 - Space: 0 - VulkanBinding: - Binding: 9 - - Name: Result3D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 10 - Space: 0 - VulkanBinding: - Binding: 10 - - Name: Result4D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 11 - Space: 0 - VulkanBinding: - Binding: 11 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sqrt.16.test b/test/Feature/HLSLLib/sqrt.16.test deleted file mode 100644 index 66704c4d..00000000 --- a/test/Feature/HLSLLib/sqrt.16.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = sqrt(In[0]); - half4 Tmp = {sqrt(In[1].xyz), sqrt(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {sqrt(In[2].xy), sqrt(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x03FF, 0x7c00, 0xbc00, 0x4000, 0x4c00, 0x5c00, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, 0x03FF, Inf, -1, 2, 16.0, 256.0, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0x7e00, 0x7e00, 0x8000, 0x0000, 0x1FFF, 0x7c00, 0x7e00, 0x3da8, 0x4400, 0x4c00, 0x7e00,] - # NaN, NaN, NaN, -0, 0, 0x1FFF, Inf, NaN, 1.41421, 4.0, 16.0, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/sqrt.32.test b/test/Feature/HLSLLib/sqrt.32.test deleted file mode 100644 index 2aaeabef..00000000 --- a/test/Feature/HLSLLib/sqrt.32.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = sqrt(In[0]); - float4 Tmp = {sqrt(In[1].xyz), sqrt(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {sqrt(In[2].xy), sqrt(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, -1, 2, 16.0, 256.0, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, -1, 2, 16.0, 256.0, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, nan, -0, -0, 0, 0, inf, nan, 1.41421356237, 4.0, 16.0, nan,] - # NaN, NaN, -0, -0, 0, 0, Inf, NaN, 1.41421356237, 4.0, 16.0, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 1 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/step.16.test b/test/Feature/HLSLLib/step.16.test deleted file mode 100644 index 6223facf..00000000 --- a/test/Feature/HLSLLib/step.16.test +++ /dev/null @@ -1,75 +0,0 @@ -#--- source.hlsl -StructuredBuffer Y : register(t0); -StructuredBuffer X : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = step(Y[0], X[0]); - Out[1] = half4(step(Y[1].xyz, X[1].xyz), step(Y[1].w, X[1].w)); - Out[2] = half4(step(Y[2].xy, X[2].xy), step(Y[2].zw, X[2].zw)); - Out[3] = step(half4(0.0, 1.0, 2.0, -2.0), half4(0.0, 1.0, 1.999, -1.999)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: Y - Format: Float16 - Stride: 8 - Data: [ 0x0000, 0x3c00, 0x4000, 0xc000, 0x429a, 0x489a, 0x7207, 0xd971, 0xc44d, 0x4248, 0x0011, 0x7bff ] - # 0.0, 1.0, 2.0, -2.0, 3.3, 9.2, 12344, -174.12, -4.3, 3.14159, 0.000001, 65504 - - Name: X - Format: Float16 - Stride: 8 - Data: [ 0x0000, 0x3c00, 0x3fff, 0xbfff, 0x474d, 0x419a, 0x7207, 0xe00a, 0x444d, 0x4170, 0x0022, 0x7bff ] - # 0.0, 1.0, 1.999, -1.999, 7.3, 2.8, 12345, -517.23, 4.3, 2.71828, 0.000002, 65504 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: Float16 - Stride: 8 - Data: [ 0x3c00, 0x3c00, 0, 0x3c00, 0x3c00, 0, 0x3c00, 0, 0x3c00, 0, 0x3c00, 0x3c00, 0x3c00, 0x3c00, 0, 0x3c00 ] - # 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1 -Results: - - Result: Test - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/step.32.test b/test/Feature/HLSLLib/step.32.test deleted file mode 100644 index de450c66..00000000 --- a/test/Feature/HLSLLib/step.32.test +++ /dev/null @@ -1,72 +0,0 @@ -#--- source.hlsl -StructuredBuffer Y : register(t0); -StructuredBuffer X : register(t1); - -RWStructuredBuffer Out : register(u2); - - -[numthreads(1,1,1)] -void main() { - Out[0] = step(Y[0], X[0]); - Out[1] = float4(step(Y[1].xyz, X[1].xyz), step(Y[1].w, X[1].w)); - Out[2] = float4(step(Y[2].xy, X[2].xy), step(Y[2].zw, X[2].zw)); - Out[3] = step(float4(0.0, 1.0, 2.0, -2.0), float4(0.0, 1.0, 1.999, -1.999)); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: Y - Format: Float32 - Stride: 16 - Data: [ 0.0, 1.0, 2.0, -2.0, 3.3, 9.2, 12344, -174.12, -4.3, 3.14159, 0.000001, 3e+38 ] - - Name: X - Format: Float32 - Stride: 16 - Data: [ 0.0, 1.0, 1.999, -1.999, 7.3, 2.8, 12345, -517.23, 4.3, 2.71828, 0.000002, 3e+38 ] - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut - Format: Float32 - Stride: 16 - Data: [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1 ] -Results: - - Result: Test - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: Y - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: X - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/tanh.16.test b/test/Feature/HLSLLib/tanh.16.test deleted file mode 100644 index 4705a1c3..00000000 --- a/test/Feature/HLSLLib/tanh.16.test +++ /dev/null @@ -1,70 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = tanh(In[0]); - half4 Tmp = {tanh(In[1].xyz), tanh(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {tanh(In[2].xy), tanh(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x8000, 0x0000, 0x3c00, 0xbc00, 0x8000, 0x0000, 0x3c00, 0xbc00, 0x8000, 0x0000, 0x3c00, 0xbc00] - # -0, 0, 1, -1, -0, 0 , 1, -1, -0, 0, 1, -1 - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 24 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x8000, 0x0000, 0x3a18, 0xba18, 0x8000, 0x0000, 0x3a18, 0xba18, 0x8000, 0x0000, 0x3a18, 0xba18] - # -0.0, 0.0, 0.761594, -0.761594, -0.0, 0.0, 0.761594, -0.761594, -0.0, 0.0, 0.761594, -0.761594, -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 5 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/KhronosGroup/SPIRV-Cross/issues/2507 -# XFAIL: Vulkan-Darwin - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/tanh.32.test b/test/Feature/HLSLLib/tanh.32.test deleted file mode 100644 index b803e877..00000000 --- a/test/Feature/HLSLLib/tanh.32.test +++ /dev/null @@ -1,65 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = tanh(In[0]); - float4 Tmp = {tanh(In[1].xyz), tanh(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {tanh(In[2].xy), tanh(In[2].zw)}; - Out[2] = Tmp2; -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ -0, 0, 1, -1, -0, 0, 1, -1, -0, 0, 1, -1] - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 48 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ -0.0, 0.0, 0.761594, -0.761594, -0.0, 0.0, 0.761594, -0.761594, -0.0, 0.0, 0.761594, -0.761594] -Results: - - Result: Test1 - Rule: BufferFloatULP - ULPT: 120 # Metal has an ULP range of 5, CUDA has 2, but NV drivers seem to have wider drift - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Gis -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/trunc.16.test b/test/Feature/HLSLLib/trunc.16.test deleted file mode 100644 index cedc7644..00000000 --- a/test/Feature/HLSLLib/trunc.16.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = trunc(In[0]); - half4 Tmp = {trunc(In[1].xyz), trunc(In[1].w)}; - Out[1] = Tmp; - half4 Tmp2 = {trunc(In[2].xy), trunc(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = trunc(In[3]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8001, 0x8000, 0x0000, 0x0001, 0x7c00, 0x4900, 0x4933, 0x4940, 0x494d, 0xc900, 0xc933, 0xc940, 0xc94d, 0x7e00,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 10.0, 10.4, 10.5, 10.6, -10.0, -10.4, -10.5, -10.6, - - Name: Out - Format: Float16 - Stride: 8 - ZeroInitSize: 32 - - Name: ExpectedOut # The result we expect - Format: Float16 - Stride: 8 - Data: [ 0x7e00, 0xfc00, 0x8000, 0x8000, 0x0000, 0x0000, 0x7c00, 0x4900, 0x4900, 0x4900, 0x4900, 0xc900, 0xc900, 0xc900, 0xc900, 0x7e00,] - # NaN, -Inf, -0, -0, 0, 0, Inf, 10.0, 10.0, 10.0, 10.0, -10.0, -10.0, -10.0, -10.0, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# REQUIRES: Half -# RUN: split-file %s %t -# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/HLSLLib/trunc.32.test b/test/Feature/HLSLLib/trunc.32.test deleted file mode 100644 index 2673d0bd..00000000 --- a/test/Feature/HLSLLib/trunc.32.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl - -StructuredBuffer In : register(t0); - -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main() { - Out[0] = trunc(In[0]); - float4 Tmp = {trunc(In[1].xyz), trunc(In[1].w)}; - Out[1] = Tmp; - float4 Tmp2 = {trunc(In[2].xy), trunc(In[2].zw)}; - Out[2] = Tmp2; - Out[3] = trunc(In[3]); -} - - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0x1.e7d42cp-127, -0, 0, 0x1.e7d42cp-127, inf, 10.0, 10.4, 10.5, 10.6, -10.0, -10.4, -10.5, -10.6, nan,] - # NaN, -Inf, -denorm, -0, 0, denorm, Inf, 10.0, 10.4, 10.5, 10.6, -10.0, -10.4, -10.5, -10.6, - - Name: Out - Format: Float32 - Stride: 16 - ZeroInitSize: 64 - - Name: ExpectedOut # The result we expect - Format: Float32 - Stride: 16 - Data: [ nan, -inf, -0, -0, 0, 0, inf, 10.0, 10.0, 10.0, 10.0, -10.0, -10.0, -10.0, -10.0, nan,] - # NaN, -Inf, -0, -0, 0, 0, Inf, 10.0, 10.0, 10.0, 10.0, -10.0, -10.0, -10.0, -10.0, -Results: - - Result: Test1 - Rule: BufferFloatEpsilon - Epsilon: 0 - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/ImplicitBindings/all-implicit.test b/test/Feature/ImplicitBindings/all-implicit.test deleted file mode 100644 index 755e980b..00000000 --- a/test/Feature/ImplicitBindings/all-implicit.test +++ /dev/null @@ -1,86 +0,0 @@ -#--- source.hlsl - -RWBuffer A; // dx: u0, vk: 0 -StructuredBuffer B; // dx: t0, vk: 1 - -cbuffer CB { // dx: b0, vk: 2 - int a; -} - -RWStructuredBuffer C; // dx: u1, vk: 3 - -[numthreads(4,2,1)] -void main(uint GI : SV_GroupIndex) { - C[GI].x = 2 * A[GI] + B[GI].x + a; -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [4, 1, 1] -Buffers: - - Name: BufA - Format: Int32 - Data: [ 1, 2, 3, 4, 5, 6, 7, 8] - - Name: BufB - Format: Int32 - Stride: 4 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: BufC - Format: Int32 - Stride: 4 - ZeroInitSize: 32 - - Name: CB - Format: Int32 - Data: [ 100 ] -DescriptorSets: - - Resources: - - Name: BufA - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: BufB - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: CB - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: BufC - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# Clang's Vulkan implicit binding is not yet implemented. -# UNSUPPORTED: Clang-Vulkan - -# CBuffer bindings seem to be broken under metal -# https://github.com/llvm/offload-test-suite/issues/55 -# UNSUPPORTED: Metal - -# RUN: split-file %s %t -# RUN: %if !Vulkan %{ %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl %} -# RUN: %if Vulkan %{ %dxc_target -T cs_6_0 -fspv-target-env=vulkan1.3 -fvk-use-scalar-layout -Fo %t.o %t/source.hlsl %} -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: BufC -# CHECK-NEXT: Format: Int32 -# CHECK-NEXT: Stride: 4 -# CHECK-NEXT: Data: [ 104, 108, 112, 116, 120, 124, 128, 132 ] diff --git a/test/Feature/ImplicitBindings/simple-resources.test b/test/Feature/ImplicitBindings/simple-resources.test deleted file mode 100644 index 0db49506..00000000 --- a/test/Feature/ImplicitBindings/simple-resources.test +++ /dev/null @@ -1,86 +0,0 @@ -#--- source.hlsl - -RWBuffer A : register(u1); -RWBuffer B; // gets u0 -RWBuffer C: register(u2); // unused -RWBuffer D; // gets u2 - -cbuffer CB { // gets b0 - int a; -} - -[numthreads(4,2,1)] -void main(uint GI : SV_GroupIndex) { - D[GI] = 2 * A[GI] + B[GI] + a; -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [4, 1, 1] -Buffers: - - Name: BufA - Format: Int32 - Data: [ 1, 2, 3, 4, 5, 6, 7, 8] - - Name: BufB - Format: Int32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: BufD - Format: Int32 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0] - - Name: CB - Format: Int32 - Data: [ 100 ] -DescriptorSets: - - Resources: - - Name: BufA - Kind: RWBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: BufB - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: BufD - Kind: RWBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: CB - Kind: ConstantBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# Clang's Vulkan implicit binding is not yet implemented. -# UNSUPPORTED: Clang-Vulkan - -# DXC's vulkan backend doesn't drop unused bindings, so it isn't possible to -# specify descriptor sets that are valid for both DirectX and Vulkan there. -# UNSUPPORTED: DXC-Vulkan - -# CBuffer bindings seem to be broken under metal -# https://github.com/llvm/offload-test-suite/issues/55 -# UNSUPPORTED: Metal - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: BufD -# CHECK-NEXT: Format: Int32 -# CHECK-NEXT: Data: [ 104, 108, 112, 116, 120, 124, 128, 132 ] diff --git a/test/Feature/RawBuffers/ByteAddressBuffers.test b/test/Feature/RawBuffers/ByteAddressBuffers.test deleted file mode 100644 index b4043189..00000000 --- a/test/Feature/RawBuffers/ByteAddressBuffers.test +++ /dev/null @@ -1,109 +0,0 @@ -#--- source.hlsl - -// This test checks that we will get the expected values from invoking -// various `Load*` and `Store` methods on `[RW]ByteAddressBuffer`. - -// The expected behaviour is to load the values in `In1` and `In2` at the given -// byte-offset, add them, and store the result at the respective offset in -// `Out`. We expect each load and store to only access mapped resource data, so -// `CheckAccessFullyMapped` should always return `true = 1`. - -ByteAddressBuffer In1 : register(t0); -ByteAddressBuffer In2 : register(t1); -RWByteAddressBuffer Out : register(u0); -RWBuffer MappedBuf : register(u0, space1); - -[numthreads(4,1,1)] -void main() { - uint status; - - uint u1 = In1.Load(0, status); - MappedBuf[0] = CheckAccessFullyMapped(status); - uint v1 = In2.Load(0); - Out.Store(0, u1 + v1); - - uint2 u2 = In1.Load2(16, status); - MappedBuf[1] = CheckAccessFullyMapped(status); - uint2 v2 = In2.Load2(16); - Out.Store2(16, u2 + v2); - - uint3 u3 = In1.Load3(32, status); - MappedBuf[2] = CheckAccessFullyMapped(status); - uint3 v3 = In2.Load3(32); - Out.Store3(32, u3 + v3); - - uint4 u4 = In1.Load4(48, status); - MappedBuf[3] = CheckAccessFullyMapped(status); - uint4 v4 = In2.Load4(48); - Out.Store4(48, u4 + v4); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [4, 1, 1] - -Buffers: - - Name: In1Buf - Format: Int32 - Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] - - - Name: In2Buf - Format: Hex32 - Data: [ 0x100, 0x200, 0x300, 0x400, 0x500, 0x600, 0x700, 0x800, - 0x900, 0xA00, 0xB00, 0xC00, 0xD00, 0xE00, 0xF00, 0x1000 ] - - - Name: OutBuf - Format: Hex32 - ZeroInitSize: 64 - - - Name: MappedBuf - Format: Int32 - ZeroInitSize: 16 - -DescriptorSets: - - Resources: - - Name: In1Buf - Kind: ByteAddressBuffer - DirectXBinding: - Register: 0 - Space: 0 - - - Name: In2Buf - Kind: ByteAddressBuffer - DirectXBinding: - Register: 1 - Space: 0 - - - Name: OutBuf - Kind: RWByteAddressBuffer - DirectXBinding: - Register: 0 - Space: 0 - - - Name: MappedBuf - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 1 -... -#--- end - -# UNSUPPORTED: Clang -# UNSUPPORTED: Vulkan, Metal - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: OutBuf -# CHECK-NEXT: Format: Hex32 -# CHECK-NEXT: Data: [ 0x101, 0x0, 0x0, 0x0, 0x505, 0x606, 0x0, 0x0, 0x909, -# CHECK-NEXT: 0xA0A, 0xB0B, 0x0, 0xD0D, 0xE0E, 0xF0F, 0x1010 ] - -# CHECK: - Name: MappedBuf -# CHECK-NEXT: Format: Int32 -# CHECK-NEXT: Data: [ 1, 1, 1, 1 ] diff --git a/test/Feature/RawBuffers/GetDimensions-compute.test b/test/Feature/RawBuffers/GetDimensions-compute.test deleted file mode 100644 index ed38440b..00000000 --- a/test/Feature/RawBuffers/GetDimensions-compute.test +++ /dev/null @@ -1,121 +0,0 @@ -#--- source.hlsl - -// This test checks that we will get the expected values from invoking -// `GetDimension`s on various structured buffer types. We are checking -// that `stride` denotes the total number of bytes of a single struct, -// and `numStructs` denote how many of the structs are in the buffer. - -struct R { - int4 a; -}; - -struct S { - int2 a; -}; - -StructuredBuffer A : register(t0); -RWStructuredBuffer B : register(u0); -AppendStructuredBuffer C : register(u1); -ConsumeStructuredBuffer D : register(u2); - -RWBuffer Out : register(u0, space1); - -[numthreads(4,1,1)] -void main() { - uint numStructs, stride; - int i = 0; - - A.GetDimensions(numStructs, stride); - Out[i++] = numStructs; - Out[i++] = stride; - - B.GetDimensions(numStructs, stride); - Out[i++] = numStructs; - Out[i++] = stride; - - C.GetDimensions(numStructs, stride); - Out[i++] = numStructs; - Out[i++] = stride; - - D.GetDimensions(numStructs, stride); - Out[i++] = numStructs; - Out[i++] = stride; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [4, 1, 1] - -Buffers: - - Name: A - Format: Int32 - Stride: 16 - Data: [ 0,1,2,3 ] - - - Name: B - Format: Int32 - Stride: 8 - Data: [ 0,1,2,3,4,5,6,7 ] - - - Name: C - Format: Int32 - Stride: 16 - Data: [ 0,1,2,3,4,5,6,7 ] - - - Name: D - Format: Int32 - Stride: 8 - Data: [ 0,1,2,3 ] - - - Name: Out - Format: Int32 - ZeroInitSize: 32 - -DescriptorSets: - - Resources: - - Name: A - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - - - Name: B - Kind: RWStructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - - - Name: C - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - - - Name: D - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - - - Name: Out - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 1 -... -#--- end - -# UNSUPPORTED: Clang -# UNSUPPORTED: Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: - Name: Out -# CHECK-NEXT: Format: Int32 -# CHECK-NEXT: Data: [ 1, 16, 4, 8, 2, 16, 2, 8 ] diff --git a/test/Feature/RootSignatures/Defaults.test b/test/Feature/RootSignatures/Defaults.test deleted file mode 100644 index b33d16f1..00000000 --- a/test/Feature/RootSignatures/Defaults.test +++ /dev/null @@ -1,142 +0,0 @@ -#--- source.hlsl - -cbuffer RootConstants : register(b0) { - float4 C; -}; - -struct Input { - float4 A; - float4 B; -}; - -struct Output { - float4 A; -}; - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out1 : register(u1); -RWStructuredBuffer Out2 : register(u2); - -// Root signature to sanity test the default values that are given to optional -// parameters (`space = 0`, `offset = DESCRIPTOR_RANGE_OFFSET_APPEND`, etc) - -#define RootSig \ - "RootConstants(num32BitConstants = 4, b0), " \ - "DescriptorTable( " \ - " SRV(t0), " \ - " UAV(u1) " \ - "), " \ - "UAV(u2) " - -[RootSignature(RootSig)] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out1[GI].A = In[GI].A * In[GI].B * C; - Out2[GI].A = In[GI].A * In[GI].B * C * 2; -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -RuntimeSettings: - DirectX: - RootParameters: - - Kind: Constant - Name: Root - - Kind: DescriptorTable - - Kind: RootDescriptor - Resource: - Name: Out2 - Kind: RWStructuredBuffer -Buffers: - - Name: Root - Format: Float32 - Data: [ 2, 4, 6, 8 ] - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out1 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 - - Name: Out2 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK-LABEL: Name: Out1 -# CHECK: Data: [ 40, 192, 504, 1024 ] -# CHECK-LABEL: Name: Out2 -# CHECK: Data: [ 80, 384, 1008, 2048 ] - -## Root Signature Header -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 140 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 3 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 140 - -# OBJ-NEXT: Parameters: - -## RootConstants(num32BitConstants = 4, b0) -# OBJ-NEXT: - ParameterType: 1 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Constants: -# OBJ-NEXT: Num32BitValues: 4 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: ShaderRegister: 0 - -## DescriptorTable -# OBJ: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 2 -# OBJ-NEXT: RangesOffset: 80 -# OBJ-NEXT: Ranges: - -## SRV(t0) -# OBJ-NEXT: - RangeType: 0 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 0 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 - -## UAV(u1) -# OBJ: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 1 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 - -## UAV(u2) -# OBJ: - ParameterType: 4 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Descriptor: -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: ShaderRegister: 2 diff --git a/test/Feature/RootSignatures/DescriptorTables.test b/test/Feature/RootSignatures/DescriptorTables.test deleted file mode 100644 index af010c2d..00000000 --- a/test/Feature/RootSignatures/DescriptorTables.test +++ /dev/null @@ -1,97 +0,0 @@ -#--- source.hlsl - -struct Input { - float4 A; - float4 B; -}; - -StructuredBuffer In : register(t2, space0); -RWBuffer Out1 : register(u1, space4); -RWBuffer Out2 : register(u2, space4); - -[RootSignature("DescriptorTable(SRV(t2), UAV(u1, space=4), UAV(u2, space=4, numdescriptors=1))")] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out1[GI] = In[GI].A * In[GI].B; - Out2[GI] = In[GI].A * In[GI].B * 2; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out1 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 - - Name: Out2 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - - Name: Out1 - Kind: RWBuffer - DirectXBinding: - Register: 1 - Space: 4 - - Name: Out2 - Kind: RWBuffer - DirectXBinding: - Register: 2 - Space: 4 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK: Data: -# CHECK-LABEL: Name: Out1 -# CHECK: Data: [ 20, 48, 84, 128 ] -# CHECK-LABEL: Name: Out2 -# CHECK: Data: [ 40, 96, 168, 256 ] - -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 116 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 1 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 116 -# OBJ-NEXT: Parameters: -# OBJ-NEXT: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 3 -# OBJ-NEXT: RangesOffset: 44 -# OBJ-NEXT: Ranges: -# OBJ-NEXT: - RangeType: 0 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 2 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -# OBJ: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 1 -# OBJ-NEXT: RegisterSpace: 4 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -# OBJ: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 2 -# OBJ-NEXT: RegisterSpace: 4 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 diff --git a/test/Feature/RootSignatures/Flags.test b/test/Feature/RootSignatures/Flags.test deleted file mode 100644 index 8e0cec9b..00000000 --- a/test/Feature/RootSignatures/Flags.test +++ /dev/null @@ -1,169 +0,0 @@ -#--- source.hlsl - -struct Input { - float4 A; - float4 B; -}; - -struct Output { - float4 A; -}; - -StructuredBuffer In : register(t0); -StructuredBuffer InExtra : register(t1); -RWStructuredBuffer Out1 : register(u1); -RWStructuredBuffer Out2 : register(u2); - -// Root signature to test specifying various flags: -// - Edge-case value of '0' -// - Demonstrate setting of non-sampler root flags -// - Demonstrate setting of all descriptor flags -// - Demonstrate '|' of applicable flags - -#define RootSig \ - "RootFlags( " \ - " ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | " \ - " CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | " \ - " ALLOW_STREAM_OUTPUT " \ - "), " \ - "DescriptorTable( " \ - " SRV(t0, flags = DATA_STATIC), " \ - " SRV(t1, flags = DATA_STATIC_WHILE_SET_AT_EXECUTE), " \ - " UAV(u1, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE), " \ - " UAV(u2, flags = 0)" \ - ")" - -[RootSignature(RootSig)] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out1[GI].A = In[GI].A * InExtra[GI].B; - Out2[GI].A = In[GI].A * InExtra[GI].B * 2; -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -RuntimeSettings: - DirectX: - RootParameters: - - Kind: DescriptorTable -Buffers: - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: InExtra - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out1 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 - - Name: Out2 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: InExtra - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK-LABEL: Name: Out1 -# CHECK: Data: [ 20, 48, 84, 128 ] -# CHECK-LABEL: Name: Out2 -# CHECK: Data: [ 40, 96, 168, 256 ] - -## Root Signature Header -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 140 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 1 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 140 - -# OBJ-NEXT: Parameters: - -## Descriptor Table -# OBJ-NEXT: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 4 -# OBJ-NEXT: RangesOffset: 44 -# OBJ-NEXT: Ranges: - -## SRV(t0, flags = DATA_STATIC) -# OBJ-NEXT: - RangeType: 0 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 0 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -## Only data flag set as expected: -# OBJ-NEXT: DATA_STATIC: true - -## SRV(t1, flags = DATA_STATIC_WHILE_SET_AT_EXECUTE) -# OBJ-NEXT: - RangeType: 0 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 1 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -## Only data flag set as expected: -# OBJ-NEXT: DATA_STATIC_WHILE_SET_AT_EXECUTE: true - -## UAV(u1, flags = DESCRIPTORS_VOLATILE | DATA_VOLATILE) -# OBJ-NEXT: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 1 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -## Both flags set as expected: -# OBJ-NEXT: DESCRIPTORS_VOLATILE: true -# OBJ-NEXT: DATA_VOLATILE: true - -## UAV(u2, flags = 0) -# OBJ-NEXT: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 2 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -## No flags set as expected (verified using OBJ-NEXT below): - -## RootFlags( -## ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | -## CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | -## ALLOW_STREAM_OUTPUT -## ) -## RootFlags set as expected: -# OBJ-NEXT: AllowInputAssemblerInputLayout: true -# OBJ-NEXT: AllowStreamOutput: true -# OBJ-NEXT: CBVSRVUAVHeapDirectlyIndexed: true diff --git a/test/Feature/RootSignatures/ManualDescriptors.test b/test/Feature/RootSignatures/ManualDescriptors.test deleted file mode 100644 index 2bcab024..00000000 --- a/test/Feature/RootSignatures/ManualDescriptors.test +++ /dev/null @@ -1,149 +0,0 @@ -#--- source.hlsl - -struct Input { - float4 A; - float4 B; -}; - -struct Output { - float4 A; -}; - -StructuredBuffer In : register(t0); -StructuredBuffer InExtra : register(t1); -RWStructuredBuffer Out1 : register(u1); -RWStructuredBuffer Out2 : register(u2); - -// Root signature to test manual `offset` and `numDescriptor` specification: -// - Demonstrate manually describing the offsets and numDescriptors -// - Edge-case value of `offset` = `DESCRIPTOR_RANGE_OFFSET_APPEND` -// - Edge-case value of `numDescriptors` = `unbounded` - -#define RootSig \ - "DescriptorTable( " \ - " UAV(u2, offset = 3, numDescriptors = unbounded), " \ - " SRV(t0, offset = 0, numDescriptors = 2), " \ - " UAV(u1, " \ - " offset = DESCRIPTOR_RANGE_OFFSET_APPEND, " \ - " numDescriptors = 1 " \ - " ) " \ - ")" - -[RootSignature(RootSig)] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out1[GI].A = In[GI].A * InExtra[GI].B; - Out2[GI].A = In[GI].A * InExtra[GI].B * 2; -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -RuntimeSettings: - DirectX: - RootParameters: - - Kind: DescriptorTable -Buffers: - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: InExtra - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out1 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 - - Name: Out2 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: InExtra - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - - Name: Out2 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK-LABEL: Name: Out1 -# CHECK: Data: [ 20, 48, 84, 128 ] -# CHECK-LABEL: Name: Out2 -# CHECK: Data: [ 40, 96, 168, 256 ] - -## Root Signature Header -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 116 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 1 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 116 -# OBJ-NEXT: Parameters: - -## DescriptorTable -# OBJ-NEXT: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 3 -# OBJ-NEXT: RangesOffset: 44 -# OBJ-NEXT: Ranges: - -## UAV(u2, offset = 3, numDescriptors = unbounded) -# OBJ: - RangeType: 1 -## Ensure unbounded descriptors -# OBJ-NEXT: NumDescriptors: -1 -# OBJ-NEXT: BaseShaderRegister: 2 -# OBJ-NEXT: RegisterSpace: 0 -## Ensure offset = 3 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 3 - -## SRV(t0, offset = 0, numDescriptors = 2) -# OBJ: - RangeType: 0 -## Ensure 2 descriptors -# OBJ-NEXT: NumDescriptors: 2 -# OBJ-NEXT: BaseShaderRegister: 0 -# OBJ-NEXT: RegisterSpace: 0 -## Ensure offset = 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 0 - -## UAV(u1, -## offset = DESCRIPTOR_RANGE_OFFSET_APPEND, -## numDescriptors = 1 -## ) -# OBJ: - RangeType: 1 -## Ensure 1 descriptor -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 1 -# OBJ-NEXT: RegisterSpace: 0 - -## Ensure append -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 diff --git a/test/Feature/RootSignatures/NumberParameters.test b/test/Feature/RootSignatures/NumberParameters.test deleted file mode 100644 index b82ad753..00000000 --- a/test/Feature/RootSignatures/NumberParameters.test +++ /dev/null @@ -1,150 +0,0 @@ -#--- source.hlsl - -cbuffer RootConstants : register(b0) { - float4 C; -}; - -struct Input { - float4 A; - float4 B; -}; - -struct Output { - float4 A; -}; - -StructuredBuffer In : register(t4294967294); -RWStructuredBuffer Out1 : register(u1, space4294967279); -RWStructuredBuffer Out2 : register(u2); - -// Root signature to test edge-cases of specify number arguments: -// - Maximum valid register value (0xfffffffe = 4294967294) -// - Maximum valid register space value (0xffffffef = 4294967279) -// - Maximum valid num32BitConstants value -// (61 = 64 - # of used DWORDS for other params) -// - Using (un)signed integer parameter values - - -#define RootSig \ - "RootConstants(num32BitConstants = +61, b0), " \ - "DescriptorTable( " \ - " SRV(t4294967294), " \ - " UAV(u1, space = 4294967279) " \ - "), " \ - "UAV(u2) " - -[RootSignature(RootSig)] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out1[GI].A = In[GI].A * In[GI].B * C; - Out2[GI].A = In[GI].A * In[GI].B * C * 2; -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -RuntimeSettings: - DirectX: - RootParameters: - - Kind: Constant - Name: Root - - Kind: DescriptorTable - - Kind: RootDescriptor - Resource: - Name: Out2 - Kind: RWStructuredBuffer -Buffers: - - Name: Root - Format: Float32 - Data: [ 2, 4, 6, 8 ] - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out1 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 - - Name: Out2 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 4294967294 - Space: 0 - - Name: Out1 - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 4294967279 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK-LABEL: Name: Out1 -# CHECK: Data: [ 40, 192, 504, 1024 ] -# CHECK-LABEL: Name: Out2 -# CHECK: Data: [ 80, 384, 1008, 2048 ] - -## Root Signature Header -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 140 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 3 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 140 - -# OBJ-NEXT: Parameters: - -## RootConstants(num32BitConstants = +61, b0) -# OBJ-NEXT: - ParameterType: 1 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Constants: -## Check positively signed integer -# OBJ-NEXT: Num32BitValues: 61 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: ShaderRegister: 0 - -## DescriptorTable -# OBJ-NEXT: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 2 -# OBJ-NEXT: RangesOffset: 80 -# OBJ-NEXT: Ranges: - -## SRV(t4294967294) -# OBJ-NEXT: - RangeType: 0 -# OBJ-NEXT: NumDescriptors: 1 -## Check edge-case -# OBJ-NEXT: BaseShaderRegister: 4294967294 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 - -## UAV(u1, space = 4294967279) -# OBJ: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 1 -## Check edge-case -# OBJ-NEXT: RegisterSpace: 4294967279 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 - -## UAV(u2) -# OBJ: - ParameterType: 4 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Descriptor: -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: ShaderRegister: 2 diff --git a/test/Feature/RootSignatures/ParameterInsensitivity.test b/test/Feature/RootSignatures/ParameterInsensitivity.test deleted file mode 100644 index 84d4674d..00000000 --- a/test/Feature/RootSignatures/ParameterInsensitivity.test +++ /dev/null @@ -1,118 +0,0 @@ -#--- source.hlsl - -cbuffer RootConstants : register(b0, space2) { - float4 C; -}; - -struct Input { - float4 A; - float4 B; -}; - -struct Output { - float4 A; -}; - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -// Root signature to demonstrate: -// - All keywords and enums are case in-sensitive -// - Registers are case sensitive -// - Mandatory and optional arguments can be specified in any order - -#define RootSig \ - "rootflags(allow_input_assembler_input_layout)," \ - "rootconstants(space = 2, b0, num32bitconstants = 4), " \ - "DESCRIPTORTABLE( " \ - " SRV(FLAGS = DATA_STATIC, t0, SPACE = 0), " \ - " VisibilitY = ShadeR_VisibilitY_AlL, " \ - " uav(u1, NumDescriptors = UNBOUNDED) " \ - ")" - -[RootSignature(RootSig)] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out[GI].A = In[GI].A * In[GI].B * C; -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -RuntimeSettings: - DirectX: - RootParameters: - - Kind: Constant - Name: Root - - Kind: DescriptorTable -Buffers: - - Name: Root - Format: Float32 - Data: [ 2, 4, 6, 8 ] - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK-LABEL: Name: Out -# CHECK: Data: [ 40, 192, 504, 1024 ] - -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 116 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 2 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 116 -# OBJ-NEXT: Parameters: -# OBJ-NEXT: - ParameterType: 1 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Constants: -# OBJ-NEXT: Num32BitValues: 4 -# OBJ-NEXT: RegisterSpace: 2 -# OBJ-NEXT: ShaderRegister: 0 -# OBJ-NEXT: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 2 -# OBJ-NEXT: RangesOffset: 68 -# OBJ-NEXT: Ranges: -# OBJ-NEXT: - RangeType: 0 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 0 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -# OBJ-NEXT: DATA_STATIC: true -# OBJ-NEXT: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: -1 -# OBJ-NEXT: BaseShaderRegister: 1 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -# OBJ: AllowInputAssemblerInputLayout: true diff --git a/test/Feature/RootSignatures/RootConstants.test b/test/Feature/RootSignatures/RootConstants.test deleted file mode 100644 index 2d035b6e..00000000 --- a/test/Feature/RootSignatures/RootConstants.test +++ /dev/null @@ -1,97 +0,0 @@ -#--- source.hlsl - -cbuffer RootConstants : register(b2) { - float4 C; -}; - -struct Input { - float4 A; - float4 B; -}; - -StructuredBuffer In : register(t2, space0); -RWBuffer Out : register(u1, space4); - -[RootSignature("RootConstants(b2, num32BitConstants = 4), DescriptorTable(SRV(t2), UAV(u1, space=4))")] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out[GI] = In[GI].A * In[GI].B * C; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -RuntimeSettings: - DirectX: - RootParameters: - - Kind: Constant - Name: Root - - Kind: DescriptorTable -Buffers: - - Name: Root - Format: Float32 - Data: [ 2, 4, 6, 8 ] - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - - Name: Out - Kind: RWBuffer - DirectXBinding: - Register: 1 - Space: 4 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK-LABEL: Name: Out -# CHECK: Data: [ 40, 192, 504, 1024 ] - -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 116 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 2 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 116 -# OBJ-NEXT: Parameters: -# OBJ-NEXT: - ParameterType: 1 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Constants: -# OBJ-NEXT: Num32BitValues: 4 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: ShaderRegister: 2 -# OBJ: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 2 -# OBJ-NEXT: RangesOffset: 68 -# OBJ-NEXT: Ranges: -# OBJ: - RangeType: 0 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 2 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -# OBJ: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 1 -# OBJ-NEXT: RegisterSpace: 4 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 diff --git a/test/Feature/RootSignatures/RootDescriptorAndTables.test b/test/Feature/RootSignatures/RootDescriptorAndTables.test deleted file mode 100644 index a572600c..00000000 --- a/test/Feature/RootSignatures/RootDescriptorAndTables.test +++ /dev/null @@ -1,101 +0,0 @@ -#--- source.hlsl - -cbuffer RootConstants : register(b2) { - float4 C; -}; - -struct Input { - float4 A; - float4 B; -}; - -struct Output { - float4 A; -}; - -StructuredBuffer In : register(t2, space0); -RWStructuredBuffer Out : register(u1, space4); - -[RootSignature("CBV(b2), DescriptorTable(SRV(t2)), UAV(u1, space=4)")] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out[GI].A = In[GI].A * In[GI].B * C; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -RuntimeSettings: - DirectX: - RootParameters: - - Kind: RootDescriptor - Resource: - Name: cbuffer - Kind: ConstantBuffer - - Kind: DescriptorTable - - Kind: RootDescriptor - Resource: - Name: Out - Kind: RWStructuredBuffer -Buffers: - - Name: cbuffer - Format: Float32 - Data: [ 2, 4, 6, 8 ] - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK-LABEL: Name: Out -# CHECK: Data: [ 40, 192, 504, 1024 ] - -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 116 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 3 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 116 -# OBJ-NEXT: Parameters: -# OBJ-NEXT: - ParameterType: 2 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Descriptor: -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: ShaderRegister: 2 -# OBJ: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 1 -# OBJ-NEXT: RangesOffset: 80 -# OBJ-NEXT: Ranges: -# OBJ-NEXT: - RangeType: 0 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 2 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -# OBJ: - ParameterType: 4 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Descriptor: -# OBJ-NEXT: RegisterSpace: 4 -# OBJ-NEXT: ShaderRegister: 1 diff --git a/test/Feature/RootSignatures/RootDescriptors.test b/test/Feature/RootSignatures/RootDescriptors.test deleted file mode 100644 index 1c54618d..00000000 --- a/test/Feature/RootSignatures/RootDescriptors.test +++ /dev/null @@ -1,92 +0,0 @@ -#--- source.hlsl - -cbuffer RootConstants : register(b2) { - float4 C; -}; - -struct Input { - float4 A; - float4 B; -}; - -struct Output { - float4 A; -}; - -StructuredBuffer In : register(t2, space0); -RWStructuredBuffer Out : register(u1, space4); - -[RootSignature("CBV(b2), SRV(t2), UAV(u1, space=4)")] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out[GI].A = In[GI].A * In[GI].B * C; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -RuntimeSettings: - DirectX: - RootParameters: - - Kind: RootDescriptor - Resource: - Name: cbuffer - Kind: ConstantBuffer - - Kind: RootDescriptor - Resource: - Name: In - Kind: StructuredBuffer - - Kind: RootDescriptor - Resource: - Name: Out - Kind: RWStructuredBuffer -Buffers: - - Name: cbuffer - Format: Float32 - Data: [ 2, 4, 6, 8 ] - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: [] -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK-LABEL: Name: Out -# CHECK: Data: [ 40, 192, 504, 1024 ] - -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 96 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 3 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 96 -# OBJ-NEXT: Parameters: -# OBJ-NEXT: - ParameterType: 2 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Descriptor: -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: ShaderRegister: 2 -# OBJ: - ParameterType: 3 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Descriptor: -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: ShaderRegister: 2 -# OBJ: - ParameterType: 4 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Descriptor: -# OBJ-NEXT: RegisterSpace: 4 -# OBJ-NEXT: ShaderRegister: 1 diff --git a/test/Feature/RootSignatures/TwoDescriptorTables.test b/test/Feature/RootSignatures/TwoDescriptorTables.test deleted file mode 100644 index be5b73c1..00000000 --- a/test/Feature/RootSignatures/TwoDescriptorTables.test +++ /dev/null @@ -1,104 +0,0 @@ -#--- source.hlsl - -struct Input { - float4 A; - float4 B; -}; - -StructuredBuffer In : register(t2, space0); -RWBuffer Out1 : register(u1, space4); -RWBuffer Out2 : register(u2, space4); - -[RootSignature("DescriptorTable(SRV(t2), UAV(u1, space=4)), DescriptorTable(UAV(u2, space=4, numdescriptors=1))")] -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out1[GI] = In[GI].A * In[GI].B; - Out2[GI] = In[GI].A * In[GI].B * 2; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 32 - Data: [ 2, 4, 6, 8, 10, 12, 14, 16] - - Name: Out1 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 - - Name: Out2 - Format: Float32 - Channels: 4 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - - Name: Out1 - Kind: RWBuffer - DirectXBinding: - Register: 1 - Space: 4 - - Resources: - - Name: Out2 - Kind: RWBuffer - DirectXBinding: - Register: 2 - Space: 4 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s -# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=OBJ - -# CHECK: Data: -# CHECK-LABEL: Name: Out1 -# CHECK: Data: [ 20, 48, 84, 128 ] -# CHECK-LABEL: Name: Out2 -# CHECK: Data: [ 40, 96, 168, 256 ] - -# OBJ: - Name: RTS0 -# OBJ-NEXT: Size: 136 -# OBJ-NEXT: RootSignature: -# OBJ-NEXT: Version: 2 -# OBJ-NEXT: NumRootParameters: 2 -# OBJ-NEXT: RootParametersOffset: 24 -# OBJ-NEXT: NumStaticSamplers: 0 -# OBJ-NEXT: StaticSamplersOffset: 136 -# OBJ-NEXT: Parameters: -# OBJ-NEXT: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 2 -# OBJ-NEXT: RangesOffset: 56 -# OBJ-NEXT: Ranges: -# OBJ: - RangeType: 0 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 2 -# OBJ-NEXT: RegisterSpace: 0 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -# OBJ: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 1 -# OBJ-NEXT: RegisterSpace: 4 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 -# OBJ: - ParameterType: 0 -# OBJ-NEXT: ShaderVisibility: 0 -# OBJ-NEXT: Table: -# OBJ-NEXT: NumRanges: 1 -# OBJ-NEXT: RangesOffset: 112 -# OBJ-NEXT: Ranges: -# OBJ: - RangeType: 1 -# OBJ-NEXT: NumDescriptors: 1 -# OBJ-NEXT: BaseShaderRegister: 2 -# OBJ-NEXT: RegisterSpace: 4 -# OBJ-NEXT: OffsetInDescriptorsFromTableStart: 4294967295 diff --git a/test/Feature/RootSignatures/lit.local.cfg b/test/Feature/RootSignatures/lit.local.cfg deleted file mode 100644 index bc52ce2d..00000000 --- a/test/Feature/RootSignatures/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if 'DirectX' not in config.available_features: - config.unsupported = True diff --git a/test/Feature/StructuredBuffer/dec_counter.test b/test/Feature/StructuredBuffer/dec_counter.test deleted file mode 100644 index bf3ad88d..00000000 --- a/test/Feature/StructuredBuffer/dec_counter.test +++ /dev/null @@ -1,50 +0,0 @@ -#--- source.hlsl -RWStructuredBuffer Out : register(u0); - -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out.DecrementCounter(); - Out.DecrementCounter(); - Out.DecrementCounter(); - Out[GI] = Out.DecrementCounter(); -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: Out - Format: Hex32 - Stride: 4 - ZeroInitSize: 4 -DescriptorSets: - - Resources: - - Name: Out - Kind: RWStructuredBuffer - HasCounter: true - DirectXBinding: - Register: 0 - Space: 0 -... -#--- end - -# Offload tests are missing support for counters on Vulcan -# https://github.com/llvm/offload-test-suite/issues/303 -# XFAIL: Vulkan - -# Offload tests are missing support for counters on Metal -# https://github.com/llvm/offload-test-suite/issues/304 -# XFAIL: Metal - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: Out -# CHECK: Counter: 4294967292 -# CHECK: Data: [ -# CHECK: 0xFFFFFFFC -# CHECK: ] diff --git a/test/Feature/StructuredBuffer/inc_counter.test b/test/Feature/StructuredBuffer/inc_counter.test deleted file mode 100644 index 1079c0e9..00000000 --- a/test/Feature/StructuredBuffer/inc_counter.test +++ /dev/null @@ -1,50 +0,0 @@ -#--- source.hlsl -RWStructuredBuffer Out : register(u0); - -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out.IncrementCounter(); - Out.IncrementCounter(); - Out.IncrementCounter(); - Out[GI] = Out.IncrementCounter(); -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: Out - Format: Hex32 - Stride: 4 - ZeroInitSize: 4 -DescriptorSets: - - Resources: - - Name: Out - Kind: RWStructuredBuffer - HasCounter: true - DirectXBinding: - Register: 0 - Space: 0 -... -#--- end - -# Offload tests are missing support for counters on Vulcan -# https://github.com/llvm/offload-test-suite/issues/303 -# XFAIL: Vulkan - -# Offload tests are missing support for counters on Metal -# https://github.com/llvm/offload-test-suite/issues/304 -# XFAIL: Metal - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: Out -# CHECK: Counter: 4 -# CHECK: Data: [ -# CHECK: 0x3 -# CHECK: ] diff --git a/test/Feature/StructuredBuffer/inc_counter_array.test b/test/Feature/StructuredBuffer/inc_counter_array.test deleted file mode 100644 index 51188c33..00000000 --- a/test/Feature/StructuredBuffer/inc_counter_array.test +++ /dev/null @@ -1,73 +0,0 @@ -#--- source.hlsl - -// This test verifies handling of resource arrays when the -// resource type has a counter. - -RWStructuredBuffer Out[4] : register(u0); - -[numthreads(4,1,1)] -void main(uint GI : SV_GroupIndex) { - for (int i = 0; i < GI; i++) - Out[GI].IncrementCounter(); - - Out[GI][0] = Out[GI].IncrementCounter(); -} - -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: Out - Format: Hex32 - Stride: 4 - ArraySize: 4 - ZeroInitSize: 4 - -DescriptorSets: - - Resources: - - Name: Out - Kind: RWStructuredBuffer - HasCounter: true - DirectXBinding: - Register: 0 - Space: 0 -... -#--- end - -# Offload tests are missing support for counters on Vulkan -# https://github.com/llvm/offload-test-suite/issues/303 -# XFAIL: Vulkan - -# Offload tests are missing support for counters and resource arrays on Metal -# https://github.com/llvm/offload-test-suite/issues/304 -# https://github.com/llvm/offload-test-suite/issues/305 -# XFAIL: Metal - -# https://github.com/llvm/llvm-project/issues/154407 -# XFAIL: Clang-DirectX - -# WARP has an issue counters in resource arrays -# Internal issue #58567630 -# XFAIL: DirectX-WARP -# -# Intel has an issue with counters in resource arrays -# https://github.com/llvm/offload-test-suite/issues/376 -# XFAIL: DirectX-Intel - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Creating UAV: { Size = 4100, Register = u0, Space = 0, HasCounter = 1 } -# CHECK: UAV: HeapIdx = 0 EltSize = 4 NumElts = 1 HasCounter = 1 - -# CHECK: Name: Out -# CHECK: Counters: [ 1, 2, 3, 4 ] -# CHECK: Data: - - [ 0x0 ] - - [ 0x1 ] - - [ 0x2 ] - - [ 0x3 ] diff --git a/test/Feature/StructuredBuffer/layout.test b/test/Feature/StructuredBuffer/layout.test deleted file mode 100644 index 4e8c2e12..00000000 --- a/test/Feature/StructuredBuffer/layout.test +++ /dev/null @@ -1,108 +0,0 @@ -#--- source.hlsl -struct A { - float a; - float b; - float c; -}; - -struct B { - float2 ab; - float c; -}; - -struct C { - float abc[3]; -}; - -StructuredBuffer In : register(t0); -RWStructuredBuffer BufA : register(u1); -RWStructuredBuffer BufB : register(u2); -RWStructuredBuffer BufC : register(u3); - -[numthreads(1,1,4)] -void main(uint GI : SV_GroupIndex) { - BufA[GI].a = In[GI].x; - BufA[GI].b = In[GI].y; - BufA[GI].c = In[GI].z; - BufB[GI].ab.x = In[GI].x; - BufB[GI].ab.y = In[GI].y; - BufB[GI].c = In[GI].z; - BufC[GI].abc[0] = In[GI].x; - BufC[GI].abc[1] = In[GI].y; - BufC[GI].abc[2] = In[GI].z; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 32 - Data: [ 1, 2, 3, 0, 4, 5, 6, 0, 7, 8, 9, 0, 10, 11, 12, 0 ] - - Name: BufA - Format: Float32 - Stride: 12 - ZeroInitSize: 48 - - Name: BufB - Format: Float32 - Stride: 12 - ZeroInitSize: 48 - - Name: BufC - Format: Float32 - Stride: 12 - ZeroInitSize: 48 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: BufA - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: BufB - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: BufC - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... -#--- end - -# clang-dxc doesn't support the -fvk-use-scalar-layout flag yet -# UNSUPPORTED: Clang-Vulkan - -# RUN: split-file %s %t -# RUN: %if !Vulkan %{ %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl %} -# RUN: %if Vulkan %{ %dxc_target -T cs_6_0 -fspv-target-env=vulkan1.3 -fvk-use-scalar-layout -Fo %t.o %t/source.hlsl %} -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: In -# CHECK: Data: [ - -# CHECK: Name: BufA -# CHECK: Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ] - -# CHECK: Name: BufB -# CHECK: Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ] - -# CHECK: Name: BufC -# CHECK: Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ] diff --git a/test/Feature/StructuredBuffer/packed.test b/test/Feature/StructuredBuffer/packed.test deleted file mode 100644 index a8427089..00000000 --- a/test/Feature/StructuredBuffer/packed.test +++ /dev/null @@ -1,48 +0,0 @@ -#--- source.hlsl -struct Doggo { - int3 Legs; - int TailState; - int2 Ears; -}; - -RWStructuredBuffer Buf; - -[numthreads(2,1,1)] -void main(uint GI : SV_GroupIndex) { - Doggo Fido = Buf[GI]; - if (Fido.TailState == 0) { - Fido.TailState = Fido.Legs.x + Fido.Legs.y + Fido.Legs.z; - } - Buf[GI] = Fido; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: Buf - Format: Int32 - Stride: 24 - Data: [ 0, 1, 2, 0, 4, 0, 1, 2, 3, 0, 4, 0] -DescriptorSets: - - Resources: - - Name: Buf - Kind: RWStructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 -... -#--- end - -# UNSUPPORTED: Clang -# RUN: split-file %s %t -# RUN: %if !Vulkan %{ %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl %} -# RUN: %if Vulkan %{ %dxc_target -T cs_6_0 -fspv-target-env=vulkan1.3 -fvk-use-scalar-layout -Fo %t.o %t/source.hlsl %} -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - - -# CHECK: Data: [ 0, 1, 2, 3, 4, 0, 1, 2, 3, 6, 4, 0 ] diff --git a/test/Feature/StructuredBuffer/simple.test b/test/Feature/StructuredBuffer/simple.test deleted file mode 100644 index 7140e758..00000000 --- a/test/Feature/StructuredBuffer/simple.test +++ /dev/null @@ -1,82 +0,0 @@ -#--- source.hlsl -struct S1 { - int4 i; - float4 f; -}; -struct S2 { - float4 f; - int4 i; -}; - -StructuredBuffer In : register(t1); -RWStructuredBuffer Out : register(u0); - -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out[GI].f = In[GI].f; - Out[GI].i = In[GI].i; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Hex32 - Stride: 32 - Data: [0x00000000, 0x00000001, 0x00000002, 0x00000003, - 0x00000000, 0x3f800000, 0x40000000, 0x40400000] - - Name: Out - Format: Hex32 - Stride: 32 - ZeroInitSize: 32 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 -... -#--- end - -# UNSUPPORTED: Clang-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: In -# CHECK: Data: [ -# CHECK: 0x0, -# CHECK: 0x1, -# CHECK: 0x2, -# CHECK: 0x3, -# CHECK: 0x0, -# CHECK: 0x3F800000, -# CHECK: 0x40000000, -# CHECK: 0x40400000 -# CHECK: ] - -# CHECK: Name: Out -# CHECK: Data: [ -# CHECK: 0x0, -# CHECK: 0x3F800000, -# CHECK: 0x40000000, -# CHECK: 0x40400000, -# CHECK: 0x0, -# CHECK: 0x1, -# CHECK: 0x2, -# CHECK: 0x3 -# CHECK: ] diff --git a/test/Feature/StructuredBuffer/srv.test b/test/Feature/StructuredBuffer/srv.test deleted file mode 100644 index 4267de31..00000000 --- a/test/Feature/StructuredBuffer/srv.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl -struct S1 { - int4 i; -}; - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(1,1,1)] -void main(uint GI : SV_GroupIndex) { - Out[GI].i = In[GI].i * 2.0; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Hex32 - Stride: 16 - Data: [0x00000000, 0x00000001, 0x00000002, 0x00000003] - - Name: Out - Format: Hex32 - Stride: 16 - ZeroInitSize: 16 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# https://github.com/llvm/llvm-project/issues/140739 -# UNSUPPORTED: Clang-Vulkan - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: In -# CHECK: Data: [ -# CHECK: 0x0, -# CHECK: 0x1, -# CHECK: 0x2, -# CHECK: 0x3 -# CHECK: ] - -# CHECK: Name: Out -# CHECK: Data: [ -# CHECK: 0x0, -# CHECK: 0x2, -# CHECK: 0x4, -# CHECK: 0x6 -# CHECK: ] diff --git a/test/Feature/StructuredBuffer/stride.test b/test/Feature/StructuredBuffer/stride.test deleted file mode 100644 index 8b4ee854..00000000 --- a/test/Feature/StructuredBuffer/stride.test +++ /dev/null @@ -1,55 +0,0 @@ -#--- source.hlsl -struct S { - float4 i; -}; - -StructuredBuffer In : register(t0); -RWStructuredBuffer Out : register(u1); - -[numthreads(2,1,1)] -void main(uint GI : SV_GroupIndex) { - Out[GI].i = In[GI]; -} -//--- pipeline.yaml ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: In - Format: Float32 - Stride: 32 - Data: [ 1, 2, 3, 4, 5, 6, 7, 8 ] - - Name: Out - Format: Float32 - Stride: 4 - ZeroInitSize: 32 -DescriptorSets: - - Resources: - - Name: In - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: In -# CHECK: Data: [ - -# CHECK: Name: Out -# CHECK: Data: [ 1, 2, 3, 4, 5, 6, 7, 8 ] diff --git a/test/Feature/WaveOps/WaveActiveAllTrue.test b/test/Feature/WaveOps/WaveActiveAllTrue.test deleted file mode 100644 index 4f851847..00000000 --- a/test/Feature/WaveOps/WaveActiveAllTrue.test +++ /dev/null @@ -1,68 +0,0 @@ -#--- source.hlsl -RWBuffer value; -RWStructuredBuffer Out : register(u1); - -[numthreads(4, 1, 1)] -void main(uint3 threadID : SV_DispatchThreadID) { - bool B1 = true; - switch (value[threadID.x]) { - case 0: // threads 0 and 1 - B1 = false; - Out[threadID.x] = WaveActiveAllTrue(B1); // Only threads 0 and 1 are active; result should be false; - break; - case 2: // thread 2 - Out[threadID.x] = WaveActiveAllTrue(B1); // only thread 2 is active; result should be true; - break; - default: // thread 3 - Out[threadID.x] = WaveActiveAllTrue(B1); // only thread 3 is active; result should be true; - break; - } - Out[threadID.x + 4] = WaveActiveAllTrue(B1); // false because its false for threads 0 and 1 -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: value - Format: Int32 - Data: [ 0, 0, 1, 2] - - Name: Out - Format: UInt32 - Stride: 4 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: UInt32 - Stride: 4 - Data: [0, 0, 1, 1, 0, 0, 0, 0] -Results: - - Result: Test - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: value - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/WaveOps/WaveActiveAnyTrue.test b/test/Feature/WaveOps/WaveActiveAnyTrue.test deleted file mode 100644 index 07af08c1..00000000 --- a/test/Feature/WaveOps/WaveActiveAnyTrue.test +++ /dev/null @@ -1,67 +0,0 @@ -#--- source.hlsl -RWBuffer value; -RWStructuredBuffer Out : register(u1); - -[numthreads(4, 1, 1)] -void main(uint3 threadID : SV_DispatchThreadID) { - bool B1 = false; - switch (value[threadID.x]) { - case 0: - case 2: // threads 0, 1, 2; result for each false - Out[threadID.x] = WaveActiveAnyTrue(B1); - B1 = true; - break; - default: // thread 3; result is false - Out[threadID.x] = WaveActiveAnyTrue(B1); - break; - } - // result for all threads is true because B1 is true for threads 0-2 - Out[threadID.x + 4] = WaveActiveAnyTrue(B1); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: value - Format: Int32 - Data: [ 0, 0, 1, 2] - - Name: Out - Format: UInt32 - Stride: 4 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: UInt32 - Stride: 4 - Data: [0, 0, 0, 0, 1, 1, 1, 1] -Results: - - Result: Test - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: value - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/WaveOps/WaveActiveCountBits.test b/test/Feature/WaveOps/WaveActiveCountBits.test deleted file mode 100644 index a8e289c9..00000000 --- a/test/Feature/WaveOps/WaveActiveCountBits.test +++ /dev/null @@ -1,70 +0,0 @@ -#--- source.hlsl -RWBuffer value; -RWStructuredBuffer Out : register(u1); - -[numthreads(4, 1, 1)] -void main(uint3 threadID : SV_DispatchThreadID) { - bool B1 = false; - - switch (value[threadID.x]) { - case 0: // threads 0 and 1; result is number of active lanes (2) - Out[threadID.x + 4] = WaveActiveCountBits(true); // threads 0 and 1 - case 2: - B1 = true; // set b1 to true for thread 3 - break; - default: - Out[threadID.x + 4] = WaveActiveCountBits(false); // thread 2; expect 0 - break; - } - // should be 3 because B1 set to true for threads 0,1, and 3. - uint Count = WaveActiveCountBits(B1); - Out[threadID.x] = Count; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: value - Format: Int32 - Data: [ 0, 0, 1, 2] - - Name: Out - Format: UInt32 - Stride: 4 - ZeroInitSize: 28 - - Name: ExpectedOut - Format: UInt32 - Stride: 4 - Data: [3, 3, 3, 3, 2, 2, 0] -Results: - - Result: Test - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: value - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o - diff --git a/test/Feature/WaveOps/WaveGetLaneIndex.test b/test/Feature/WaveOps/WaveGetLaneIndex.test deleted file mode 100644 index 4efa3752..00000000 --- a/test/Feature/WaveOps/WaveGetLaneIndex.test +++ /dev/null @@ -1,44 +0,0 @@ -#--- source.hlsl -RWStructuredBuffer Out : register(u0); - -[numthreads(4, 1, 1)] -void main(uint3 threadID : SV_DispatchThreadID) { - Out[threadID.x] = WaveGetLaneIndex(); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [2, 1, 1] -Buffers: - - Name: Out - Format: UInt32 - Stride: 4 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: UInt32 - Stride: 4 - Data: [0, 1, 2, 3, 0, 1, 2, 3] -Results: - - Result: Test - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/WaveOps/WaveIsFirstLane.test b/test/Feature/WaveOps/WaveIsFirstLane.test deleted file mode 100644 index 312222c1..00000000 --- a/test/Feature/WaveOps/WaveIsFirstLane.test +++ /dev/null @@ -1,72 +0,0 @@ -#--- source.hlsl -RWBuffer value; -RWStructuredBuffer Out : register(u1); - -[numthreads(4, 1, 1)] -void main(uint3 threadID : SV_DispatchThreadID) { - uint tmp = 0xFF; - switch (value[threadID.x]) { - case 0: - tmp = WaveIsFirstLane(); // threads 0 and 1; 0 is first for both - break; - case 2: - tmp = WaveIsFirstLane(); // thread 3; 3 is first - break; - default: - tmp = WaveIsFirstLane(); // thread 2; 2 is first - break; - } - // Using a temporary value that is wave-divergent here seems to hit a driver - // bug in the NV 50-series GPUs - Out[threadID.x] = tmp; - Out[threadID.x + 4] = WaveIsFirstLane(); // 0 is first for all -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: value - Format: Int32 - Data: [ 0, 0, 1, 2] - - Name: Out - Format: UInt32 - Stride: 4 - ZeroInitSize: 32 - - Name: ExpectedOut - Format: UInt32 - Stride: 4 - Data: [1, 0, 1, 1, 1, 0, 0, 0] -Results: - - Result: Test - Rule: BufferExact - Actual: Out - Expected: ExpectedOut -DescriptorSets: - - Resources: - - Name: value - Kind: RWBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# XFAIL: NV-Reconvergence-Issue-320 - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Unit/lit.site.cfg.py.in b/test/Unit/lit.site.cfg.py.in index c47da732..a5416bab 100644 --- a/test/Unit/lit.site.cfg.py.in +++ b/test/Unit/lit.site.cfg.py.in @@ -28,4 +28,3 @@ config.test_format = lit.formats.GoogleTest( "Tests", run_under=config.gtest_run_under, ) - diff --git a/test/WaveOps/WaveActiveMax.test b/test/WaveOps/WaveActiveMax.test index ba50a77c..ae43f5fc 100644 --- a/test/WaveOps/WaveActiveMax.test +++ b/test/WaveOps/WaveActiveMax.test @@ -1,106 +1,106 @@ -#--- source.hlsl -RWStructuredBuffer Nans : register(u0); -RWStructuredBuffer Infs : register(u1); -RWStructuredBuffer NegInfs : register(u2); -RWStructuredBuffer Mix : register(u3); - -[numthreads(32,1,1)] -void main(uint3 TID : SV_GroupThreadID) { - Nans[TID.x % 8] = WaveActiveMax(Nans[TID.x % 8]); - Infs[TID.x % 8] = WaveActiveMax(Infs[TID.x % 8]); - NegInfs[TID.x % 8] = WaveActiveMax(NegInfs[TID.x % 8]); - Mix[TID.x % 8] = WaveActiveMax(Mix[TID.x % 8]); -} -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: Nans - Format: Float32 - Data: [ nan, nan, nan, nan ] - - Name: Infs - Format: Float32 - Data: [ inf, inf, inf, inf ] - - Name: NegInfs - Format: Float32 - Data: [ -inf, -inf, -inf, -inf ] - - Name: Mix - Format: Float32 - Data: [ inf, -inf, nan, 0 ] -DescriptorSets: - - Resources: - - Name: Nans - Kind: RWStructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: Infs - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 - - Name: NegInfs - Kind: RWStructuredBuffer - DirectXBinding: - Register: 2 - Space: 0 - VulkanBinding: - Binding: 2 - - Name: Mix - Kind: RWStructuredBuffer - DirectXBinding: - Register: 3 - Space: 0 - VulkanBinding: - Binding: 3 -... - -#--- end - -# RUN: split-file %s %t -# RUN: %if !Vulkan %{ %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl %} -# RUN: %if Vulkan %{ %dxc_target -T cs_6_0 -fspv-target-env=vulkan1.2 -Fo %t.o %t/source.hlsl %} -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# The behavior of this operation is consistent on Metal, so the test verifies that behavior. - -# The SPIR-V Spec for OpGroupNonUniformFMax says: -# > From the set of Value(s) provided by active invocations within a subgroup, -# > if for any two Values one of them is a NaN, the other is chosen. If all -# > Value(s) that are used by the current invocation are NaN, then the result is -# > an undefined value. - -# This makes Vulkan undefined for cases where all values are nan. - -# Also SPIR-V states: -# > The identity I for Operation is -INF. - -# This makes it defined that any lane value of -INF is ignored. - -# DirectX driver implementations seem to match SPIR-V, except WARP, which does -# not treat -INF as an identity. - -# XFAIL: DirectX-WARP - -# CHECK: Name: Nans -# CHECK-NEXT: Format: Float32 -# METAL-NEXT: Data: [ 0, 0, 0, 0 ] -# DX-NEXT: Data: -# VULKAN-NEXT: Data: -# CHECK: Name: Infs -# CHECK-NEXT: Format: Float32 -# CHECK-NEXT: Data: [ inf, inf, inf, inf ] -# CHECK: Name: NegInfs -# CHECK-NEXT: Format: Float32 -# CHECK-NEXT: Data: [ 0, 0, 0, 0 ] -# CHECK: Name: Mix -# CHECK-NEXT: Format: Float32 -# CHECK-NEXT: Data: [ inf, inf, inf, inf ] +#--- source.hlsl +RWStructuredBuffer Nans : register(u0); +RWStructuredBuffer Infs : register(u1); +RWStructuredBuffer NegInfs : register(u2); +RWStructuredBuffer Mix : register(u3); + +[numthreads(32,1,1)] +void main(uint3 TID : SV_GroupThreadID) { + Nans[TID.x % 8] = WaveActiveMax(Nans[TID.x % 8]); + Infs[TID.x % 8] = WaveActiveMax(Infs[TID.x % 8]); + NegInfs[TID.x % 8] = WaveActiveMax(NegInfs[TID.x % 8]); + Mix[TID.x % 8] = WaveActiveMax(Mix[TID.x % 8]); +} +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: Nans + Format: Float32 + Data: [ nan, nan, nan, nan ] + - Name: Infs + Format: Float32 + Data: [ inf, inf, inf, inf ] + - Name: NegInfs + Format: Float32 + Data: [ -inf, -inf, -inf, -inf ] + - Name: Mix + Format: Float32 + Data: [ inf, -inf, nan, 0 ] +DescriptorSets: + - Resources: + - Name: Nans + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Infs + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: NegInfs + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: Mix + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 +... + +#--- end + +# RUN: split-file %s %t +# RUN: %if !Vulkan %{ %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl %} +# RUN: %if Vulkan %{ %dxc_target -T cs_6_0 -fspv-target-env=vulkan1.2 -Fo %t.o %t/source.hlsl %} +# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s + +# The behavior of this operation is consistent on Metal, so the test verifies that behavior. + +# The SPIR-V Spec for OpGroupNonUniformFMax says: +# > From the set of Value(s) provided by active invocations within a subgroup, +# > if for any two Values one of them is a NaN, the other is chosen. If all +# > Value(s) that are used by the current invocation are NaN, then the result is +# > an undefined value. + +# This makes Vulkan undefined for cases where all values are nan. + +# Also SPIR-V states: +# > The identity I for Operation is -INF. + +# This makes it defined that any lane value of -INF is ignored. + +# DirectX driver implementations seem to match SPIR-V, except WARP, which does +# not treat -INF as an identity. + +# XFAIL: DirectX-WARP + +# CHECK: Name: Nans +# CHECK-NEXT: Format: Float32 +# METAL-NEXT: Data: [ 0, 0, 0, 0 ] +# DX-NEXT: Data: +# VULKAN-NEXT: Data: +# CHECK: Name: Infs +# CHECK-NEXT: Format: Float32 +# CHECK-NEXT: Data: [ inf, inf, inf, inf ] +# CHECK: Name: NegInfs +# CHECK-NEXT: Format: Float32 +# CHECK-NEXT: Data: [ 0, 0, 0, 0 ] +# CHECK: Name: Mix +# CHECK-NEXT: Format: Float32 +# CHECK-NEXT: Data: [ inf, inf, inf, inf ] diff --git a/test/WaveOps/WaveReadLaneAt.Bool.test b/test/WaveOps/WaveReadLaneAt.Bool.test index cd76de75..82953566 100644 --- a/test/WaveOps/WaveReadLaneAt.Bool.test +++ b/test/WaveOps/WaveReadLaneAt.Bool.test @@ -1,66 +1,66 @@ -#--- source.hlsl - -StructuredBuffer InBool : register(t0); -RWStructuredBuffer OutBool : register(u1); - - -[numthreads(4,1,1)] -void main(uint32_t3 TID : SV_GroupThreadID) { - uint OutIdx = TID.x * 3; - - // Bool - OutBool[OutIdx] = WaveReadLaneAt(InBool[TID.x], TID.x); - bool4 ThreadInBool = {InBool[TID.x].xyz, InBool[TID.x].w}; - OutBool[OutIdx + 1] = WaveReadLaneAt(ThreadInBool, TID.x); - OutBool[OutIdx + 2].xy = WaveReadLaneAt(InBool[TID.x].xy, TID.x); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [4, 1, 1] -Buffers: - - Name: InBool - Format: Bool - Stride: 16 - Data: [ 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1 ] - - Name: OutBool - Format: Bool - Stride: 16 - ZeroInitSize: 144 - - Name: ExpectedOutBool # The result we expect - Format: Bool - Stride: 16 - Data: [ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 ] -Results: - - Result: TestBool - Rule: BufferExact - Actual: OutBool - Expected: ExpectedOutBool -DescriptorSets: - - Resources: - - Name: InBool - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: OutBool - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end -# https://github.com/llvm/llvm-project/issues/140824 -# XFAIL: Clang - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o +#--- source.hlsl + +StructuredBuffer InBool : register(t0); +RWStructuredBuffer OutBool : register(u1); + + +[numthreads(4,1,1)] +void main(uint32_t3 TID : SV_GroupThreadID) { + uint OutIdx = TID.x * 3; + + // Bool + OutBool[OutIdx] = WaveReadLaneAt(InBool[TID.x], TID.x); + bool4 ThreadInBool = {InBool[TID.x].xyz, InBool[TID.x].w}; + OutBool[OutIdx + 1] = WaveReadLaneAt(ThreadInBool, TID.x); + OutBool[OutIdx + 2].xy = WaveReadLaneAt(InBool[TID.x].xy, TID.x); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [4, 1, 1] +Buffers: + - Name: InBool + Format: Bool + Stride: 16 + Data: [ 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1 ] + - Name: OutBool + Format: Bool + Stride: 16 + ZeroInitSize: 144 + - Name: ExpectedOutBool # The result we expect + Format: Bool + Stride: 16 + Data: [ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 ] +Results: + - Result: TestBool + Rule: BufferExact + Actual: OutBool + Expected: ExpectedOutBool +DescriptorSets: + - Resources: + - Name: InBool + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutBool + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end +# https://github.com/llvm/llvm-project/issues/140824 +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneAt.Float.64.test b/test/WaveOps/WaveReadLaneAt.Float.64.test index caae8144..abe274c9 100644 --- a/test/WaveOps/WaveReadLaneAt.Float.64.test +++ b/test/WaveOps/WaveReadLaneAt.Float.64.test @@ -1,66 +1,66 @@ -#--- source.hlsl - -StructuredBuffer InFloat : register(t4); -RWStructuredBuffer OutFloat : register(u5); - - -[numthreads(4,1,1)] -void main(uint32_t3 TID : SV_GroupThreadID) { - uint OutIdx = TID.x * 3; - - // Float - OutFloat[OutIdx] = WaveReadLaneAt(InFloat[TID.x], TID.x); - float64_t4 ThreadInFloat = {InFloat[TID.x].xyz, InFloat[TID.x].w}; - OutFloat[OutIdx + 1] = WaveReadLaneAt(ThreadInFloat, TID.x);; - OutFloat[OutIdx + 2].xy = WaveReadLaneAt(InFloat[TID.x].xy, TID.x); -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [4, 1, 1] -Buffers: - - Name: InFloat - Format: Float64 - Stride: 16 - Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] - - Name: OutFloat - Format: Float64 - Stride: 16 - ZeroInitSize: 288 - - Name: ExpectedOutFloat # The result we expect - Format: Float64 - Stride: 16 - Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] -Results: - - Result: TestFloat - Rule: BufferExact - Actual: OutFloat - Expected: ExpectedOutFloat -DescriptorSets: - - Resources: - - Name: InFloat - Kind: StructuredBuffer - DirectXBinding: - Register: 4 - Space: 0 - VulkanBinding: - Binding: 4 - - Name: OutFloat - Kind: RWStructuredBuffer - DirectXBinding: - Register: 5 - Space: 0 - VulkanBinding: - Binding: 5 -... -#--- end - -# REQUIRES: Double - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o +#--- source.hlsl + +StructuredBuffer InFloat : register(t4); +RWStructuredBuffer OutFloat : register(u5); + + +[numthreads(4,1,1)] +void main(uint32_t3 TID : SV_GroupThreadID) { + uint OutIdx = TID.x * 3; + + // Float + OutFloat[OutIdx] = WaveReadLaneAt(InFloat[TID.x], TID.x); + float64_t4 ThreadInFloat = {InFloat[TID.x].xyz, InFloat[TID.x].w}; + OutFloat[OutIdx + 1] = WaveReadLaneAt(ThreadInFloat, TID.x);; + OutFloat[OutIdx + 2].xy = WaveReadLaneAt(InFloat[TID.x].xy, TID.x); +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [4, 1, 1] +Buffers: + - Name: InFloat + Format: Float64 + Stride: 16 + Data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + - Name: OutFloat + Format: Float64 + Stride: 16 + ZeroInitSize: 288 + - Name: ExpectedOutFloat # The result we expect + Format: Float64 + Stride: 16 + Data: [ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 0, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 0, 0, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 0, 0 ] +Results: + - Result: TestFloat + Rule: BufferExact + Actual: OutFloat + Expected: ExpectedOutFloat +DescriptorSets: + - Resources: + - Name: InFloat + Kind: StructuredBuffer + DirectXBinding: + Register: 4 + Space: 0 + VulkanBinding: + Binding: 4 + - Name: OutFloat + Kind: RWStructuredBuffer + DirectXBinding: + Register: 5 + Space: 0 + VulkanBinding: + Binding: 5 +... +#--- end + +# REQUIRES: Double + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveOps/WaveReadLaneAt.divergent.test b/test/WaveOps/WaveReadLaneAt.divergent.test index 65b60e52..2f1a9c8f 100644 --- a/test/WaveOps/WaveReadLaneAt.divergent.test +++ b/test/WaveOps/WaveReadLaneAt.divergent.test @@ -1,71 +1,71 @@ -#--- source.hlsl - -StructuredBuffer InInt : register(t0); -RWStructuredBuffer OutInt : register(u1); - -[numthreads(4,1,1)] -void main(uint3 TID : SV_GroupThreadID) { - - int offset = 2; - if (TID.x < 2){ - offset = 0; - } - switch (offset) { - case 0: - OutInt[TID.x] = WaveReadLaneAt(InInt[TID.x], (TID.x + 1) % 2); - break; - case 2: - OutInt[TID.x] = WaveReadLaneAt(InInt[TID.x], offset + ((TID.x - 1 + offset) % 2)); - break; - } -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [16, 1, 1] -Buffers: - - Name: InInt - Format: Int32 - Stride: 4 - Data: [0, 1, 2, 3 ] - - Name: OutInt - Format: Int32 - Stride: 4 - ZeroInitSize: 16 - - Name: ExpectedOutInt - Format: Int32 - Stride: 4 - Data: [ 1, 0, 3, 2] - -Results: - - Result: TestOut - Rule: BufferExact - Actual: OutInt - Expected: ExpectedOutInt - -DescriptorSets: - - Resources: - - Name: InInt - Kind: StructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 - - Name: OutInt - Kind: RWStructuredBuffer - DirectXBinding: - Register: 1 - Space: 0 - VulkanBinding: - Binding: 1 -... -#--- end - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o +#--- source.hlsl + +StructuredBuffer InInt : register(t0); +RWStructuredBuffer OutInt : register(u1); + +[numthreads(4,1,1)] +void main(uint3 TID : SV_GroupThreadID) { + + int offset = 2; + if (TID.x < 2){ + offset = 0; + } + switch (offset) { + case 0: + OutInt[TID.x] = WaveReadLaneAt(InInt[TID.x], (TID.x + 1) % 2); + break; + case 2: + OutInt[TID.x] = WaveReadLaneAt(InInt[TID.x], offset + ((TID.x - 1 + offset) % 2)); + break; + } +} + +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [16, 1, 1] +Buffers: + - Name: InInt + Format: Int32 + Stride: 4 + Data: [0, 1, 2, 3 ] + - Name: OutInt + Format: Int32 + Stride: 4 + ZeroInitSize: 16 + - Name: ExpectedOutInt + Format: Int32 + Stride: 4 + Data: [ 1, 0, 3, 2] + +Results: + - Result: TestOut + Rule: BufferExact + Actual: OutInt + Expected: ExpectedOutInt + +DescriptorSets: + - Resources: + - Name: InInt + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: OutInt + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize/waveSize.test b/test/WaveSize/waveSize.test new file mode 100644 index 00000000..1cb7865f --- /dev/null +++ b/test/WaveSize/waveSize.test @@ -0,0 +1,74 @@ +#--- source.hlsl +RWStructuredBuffer _participant_check_sum : register(u1); +RWStructuredBuffer _participant_bit : register(u2); + +[numthreads(256, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + _participant_check_sum[tid.x] = WaveActiveSum(1); + _participant_bit[tid.x] = WaveActiveMax(tid.x); +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_check_sum + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 64 + - Name: expected_participants + Format: UInt32 + Stride: 4 + Data: [0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 512 + - Name: _index + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 4 +Results: + - Result: WaveOpValidation + Rule: BufferExact + Actual: _participant_check_sum + Expected: expected_participants + - Result: WaveOpParticipants + Rule: BufferExact + Actual: _participant_bit + Expected: expected_participants +DescriptorSets: + - Resources: + - Name: _participant_check_sum + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 + - Name: _index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o \ No newline at end of file diff --git a/test/WaveSize16BitTracking/tests/program_1756573780637762139_1_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573780637762139_1_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cae89eed --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573780637762139_1_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 2432, 32768, 0, 4480, 32768, 0, 5120, 8, 0, 6016, 2080, 0, 6016, 2080, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 2432, 32768, 0, 4480, 32768, 0, 5120, 8, 0, 6016, 2080, 0, 6016, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573781128741453_2_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573781128741453_2_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..808d8a4b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573781128741453_2_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((24 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2948, 33920, 0, 2948, 33920, 0, 2948, 33920, 0, 2952, 33920, 0, 2952, 33920, 0, 2952, 33920, 0, 2964, 33920, 0, 2964, 33920, 0, 2964, 33920, 0, 2968, 33920, 0, 2968, 33920, 0, 2968, 33920, 0, 2980, 33920, 0, 2980, 33920, 0, 2980, 33920, 0, 2984, 33920, 0, 2984, 33920, 0, 2984, 33920, 0, 3392, 1, 0, 3408, 1, 0, 3424, 1, 0, 7232, 73, 0, 7232, 73, 0, 7232, 73, 0, 7808, 1040, 0, 7808, 1040, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 2948, 33920, 0, 2948, 33920, 0, 2948, 33920, 0, 2952, 33920, 0, 2952, 33920, 0, 2952, 33920, 0, 2964, 33920, 0, 2964, 33920, 0, 2964, 33920, 0, 2968, 33920, 0, 2968, 33920, 0, 2968, 33920, 0, 2980, 33920, 0, 2980, 33920, 0, 2980, 33920, 0, 2984, 33920, 0, 2984, 33920, 0, 2984, 33920, 0, 3392, 1, 0, 3408, 1, 0, 3424, 1, 0, 7232, 73, 0, 7232, 73, 0, 7232, 73, 0, 7808, 1040, 0, 7808, 1040, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573791013611993_6_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573791013611993_6_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..92333d16 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573791013611993_6_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,390 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((193 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((342 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((357 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 354 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 4096, 16388, 0, 4096, 16388, 0, 4112, 16388, 0, 4112, 16388, 0, 4128, 16388, 0, 4128, 16388, 0, 6020, 36, 0, 6020, 36, 0, 6024, 36, 0, 6024, 36, 0, 6036, 36, 0, 6036, 36, 0, 6040, 36, 0, 6040, 36, 0, 6052, 36, 0, 6052, 36, 0, 6056, 36, 0, 6056, 36, 0, 7616, 17, 0, 7616, 17, 0, 8832, 3, 0, 8832, 3, 0, 8848, 3, 0, 8848, 3, 0, 9920, 8738, 0, 9920, 8738, 0, 9920, 8738, 0, 9920, 8738, 0, 9924, 8738, 0, 9924, 8738, 0, 9924, 8738, 0, 9924, 8738, 0, 9936, 8738, 0, 9936, 8738, 0, 9936, 8738, 0, 9936, 8738, 0, 9940, 8738, 0, 9940, 8738, 0, 9940, 8738, 0, 9940, 8738, 0, 14336, 3, 0, 14336, 3, 0, 14352, 3, 0, 14352, 3, 0, 18944, 1040, 0, 18944, 1040, 0, 20416, 16384, 0, 20432, 16384, 0, 21312, 32, 0, 21328, 32, 0, 22848, 16384, 0, 22864, 16384, 0, 23488, 34952, 0, 23488, 34952, 0, 23488, 34952, 0, 23488, 34952, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 4096, 16388, 0, 4096, 16388, 0, 4112, 16388, 0, 4112, 16388, 0, 4128, 16388, 0, 4128, 16388, 0, 6020, 36, 0, 6020, 36, 0, 6024, 36, 0, 6024, 36, 0, 6036, 36, 0, 6036, 36, 0, 6040, 36, 0, 6040, 36, 0, 6052, 36, 0, 6052, 36, 0, 6056, 36, 0, 6056, 36, 0, 7616, 17, 0, 7616, 17, 0, 8832, 3, 0, 8832, 3, 0, 8848, 3, 0, 8848, 3, 0, 9920, 8738, 0, 9920, 8738, 0, 9920, 8738, 0, 9920, 8738, 0, 9924, 8738, 0, 9924, 8738, 0, 9924, 8738, 0, 9924, 8738, 0, 9936, 8738, 0, 9936, 8738, 0, 9936, 8738, 0, 9936, 8738, 0, 9940, 8738, 0, 9940, 8738, 0, 9940, 8738, 0, 9940, 8738, 0, 14336, 3, 0, 14336, 3, 0, 14352, 3, 0, 14352, 3, 0, 18944, 1040, 0, 18944, 1040, 0, 20416, 16384, 0, 20432, 16384, 0, 21312, 32, 0, 21328, 32, 0, 22848, 16384, 0, 22864, 16384, 0, 23488, 34952, 0, 23488, 34952, 0, 23488, 34952, 0, 23488, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573833333028868_7_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573833333028868_7_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..79d5647c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573833333028868_7_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2560, 1024, 0, 2880, 16388, 0, 2880, 16388, 0, 3328, 34952, 0, 3328, 34952, 0, 3328, 34952, 0, 3328, 34952, 0, 576, 17, 0, 576, 17, 0, 2560, 1024, 0, 2880, 16388, 0, 2880, 16388, 0, 3328, 34952, 0, 3328, 34952, 0, 3328, 34952, 0, 3328, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573833487935084_8_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573833487935084_8_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09785fcc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573833487935084_8_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,261 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 402 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5056, 17, 0, 5056, 17, 0, 6980, 8194, 0, 6980, 8194, 0, 6984, 8194, 0, 6984, 8194, 0, 6988, 8194, 0, 6988, 8194, 0, 6996, 8194, 0, 6996, 8194, 0, 7000, 8194, 0, 7000, 8194, 0, 7004, 8194, 0, 7004, 8194, 0, 8068, 16, 0, 8072, 16, 0, 8076, 16, 0, 8084, 16, 0, 8088, 16, 0, 8092, 16, 0, 9732, 4128, 0, 9732, 4128, 0, 9736, 4128, 0, 9736, 4128, 0, 9740, 4128, 0, 9740, 4128, 0, 9748, 4128, 0, 9748, 4128, 0, 9752, 4128, 0, 9752, 4128, 0, 9756, 4128, 0, 9756, 4128, 0, 10432, 3, 0, 10432, 3, 0, 10448, 3, 0, 10448, 3, 0, 12608, 8224, 0, 12608, 8224, 0, 12624, 8224, 0, 12624, 8224, 0, 12640, 8224, 0, 12640, 8224, 0, 15744, 8464, 0, 15744, 8464, 0, 15744, 8464, 0, 15760, 8464, 0, 15760, 8464, 0, 15760, 8464, 0, 15776, 8464, 0, 15776, 8464, 0, 15776, 8464, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 5056, 17, 0, 5056, 17, 0, 6980, 8194, 0, 6980, 8194, 0, 6984, 8194, 0, 6984, 8194, 0, 6988, 8194, 0, 6988, 8194, 0, 6996, 8194, 0, 6996, 8194, 0, 7000, 8194, 0, 7000, 8194, 0, 7004, 8194, 0, 7004, 8194, 0, 8068, 16, 0, 8072, 16, 0, 8076, 16, 0, 8084, 16, 0, 8088, 16, 0, 8092, 16, 0, 9732, 4128, 0, 9732, 4128, 0, 9736, 4128, 0, 9736, 4128, 0, 9740, 4128, 0, 9740, 4128, 0, 9748, 4128, 0, 9748, 4128, 0, 9752, 4128, 0, 9752, 4128, 0, 9756, 4128, 0, 9756, 4128, 0, 10432, 3, 0, 10432, 3, 0, 10448, 3, 0, 10448, 3, 0, 12608, 8224, 0, 12608, 8224, 0, 12624, 8224, 0, 12624, 8224, 0, 12640, 8224, 0, 12640, 8224, 0, 15744, 8464, 0, 15744, 8464, 0, 15744, 8464, 0, 15760, 8464, 0, 15760, 8464, 0, 15760, 8464, 0, 15776, 8464, 0, 15776, 8464, 0, 15776, 8464, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0, 16384, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573899161222878_9_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573899161222878_9_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d06bacd7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573899161222878_9_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,218 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4368, 16, 0, 4372, 16, 0, 4376, 16, 0, 4384, 16, 0, 4388, 16, 0, 4392, 16, 0, 5248, 17476, 0, 5248, 17476, 0, 5248, 17476, 0, 5248, 17476, 0, 5696, 34952, 0, 5696, 34952, 0, 5696, 34952, 0, 5696, 34952, 0, 7232, 4, 0, 4368, 16, 0, 4372, 16, 0, 4376, 16, 0, 4384, 16, 0, 4388, 16, 0, 4392, 16, 0, 5248, 17476, 0, 5248, 17476, 0, 5248, 17476, 0, 5248, 17476, 0, 5696, 34952, 0, 5696, 34952, 0, 5696, 34952, 0, 5696, 34952, 0, 7232, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573900741693426_10_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573900741693426_10_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3d696902 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573900741693426_10_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 16930, 0, 1600, 16930, 0, 1600, 16930, 0, 1600, 16930, 0, 1344, 64, 0, 2944, 17, 0, 2944, 17, 0, 5636, 17476, 0, 5636, 17476, 0, 5636, 17476, 0, 5636, 17476, 0, 5640, 17476, 0, 5640, 17476, 0, 5640, 17476, 0, 5640, 17476, 0, 5652, 17476, 0, 5652, 17476, 0, 5652, 17476, 0, 5652, 17476, 0, 5656, 17476, 0, 5656, 17476, 0, 5656, 17476, 0, 5656, 17476, 0, 6212, 17476, 0, 6212, 17476, 0, 6212, 17476, 0, 6212, 17476, 0, 6216, 17476, 0, 6216, 17476, 0, 6216, 17476, 0, 6216, 17476, 0, 6228, 17476, 0, 6228, 17476, 0, 6228, 17476, 0, 6228, 17476, 0, 6232, 17476, 0, 6232, 17476, 0, 6232, 17476, 0, 6232, 17476, 0, 6656, 1024, 0, 6672, 1024, 0, 8000, 52224, 0, 8000, 52224, 0, 8000, 52224, 0, 8000, 52224, 0, 8016, 52224, 0, 8016, 52224, 0, 8016, 52224, 0, 8016, 52224, 0, 8704, 32768, 0, 8720, 32768, 0, 11200, 32768, 0, 11904, 32768, 0, 13504, 1, 0, 14976, 512, 0, 15552, 1040, 0, 15552, 1040, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 1600, 16930, 0, 1600, 16930, 0, 1600, 16930, 0, 1600, 16930, 0, 1344, 64, 0, 2944, 17, 0, 2944, 17, 0, 5636, 17476, 0, 5636, 17476, 0, 5636, 17476, 0, 5636, 17476, 0, 5640, 17476, 0, 5640, 17476, 0, 5640, 17476, 0, 5640, 17476, 0, 5652, 17476, 0, 5652, 17476, 0, 5652, 17476, 0, 5652, 17476, 0, 5656, 17476, 0, 5656, 17476, 0, 5656, 17476, 0, 5656, 17476, 0, 6212, 17476, 0, 6212, 17476, 0, 6212, 17476, 0, 6212, 17476, 0, 6216, 17476, 0, 6216, 17476, 0, 6216, 17476, 0, 6216, 17476, 0, 6228, 17476, 0, 6228, 17476, 0, 6228, 17476, 0, 6228, 17476, 0, 6232, 17476, 0, 6232, 17476, 0, 6232, 17476, 0, 6232, 17476, 0, 6656, 1024, 0, 6672, 1024, 0, 8000, 52224, 0, 8000, 52224, 0, 8000, 52224, 0, 8000, 52224, 0, 8016, 52224, 0, 8016, 52224, 0, 8016, 52224, 0, 8016, 52224, 0, 8704, 32768, 0, 8720, 32768, 0, 11200, 32768, 0, 11904, 32768, 0, 13504, 1, 0, 14976, 512, 0, 15552, 1040, 0, 15552, 1040, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573910697240723_11_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573910697240723_11_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7d1f702c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573910697240723_11_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 5, 0, 1792, 5, 0, 1808, 5, 0, 1808, 5, 0, 1824, 5, 0, 1824, 5, 0, 2624, 16405, 0, 2624, 16405, 0, 2624, 16405, 0, 2624, 16405, 0, 2640, 16405, 0, 2640, 16405, 0, 2640, 16405, 0, 2640, 16405, 0, 2656, 16405, 0, 2656, 16405, 0, 2656, 16405, 0, 2656, 16405, 0, 5888, 40960, 0, 5888, 40960, 0, 6464, 40960, 0, 6464, 40960, 0, 1792, 5, 0, 1792, 5, 0, 1808, 5, 0, 1808, 5, 0, 1824, 5, 0, 1824, 5, 0, 2624, 16405, 0, 2624, 16405, 0, 2624, 16405, 0, 2624, 16405, 0, 2640, 16405, 0, 2640, 16405, 0, 2640, 16405, 0, 2640, 16405, 0, 2656, 16405, 0, 2656, 16405, 0, 2656, 16405, 0, 2656, 16405, 0, 5888, 40960, 0, 5888, 40960, 0, 6464, 40960, 0, 6464, 40960, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573911301770085_12_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573911301770085_12_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cd19775c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573911301770085_12_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,229 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2112, 17408, 0, 2112, 17408, 0, 2128, 17408, 0, 2128, 17408, 0, 2144, 17408, 0, 2144, 17408, 0, 3008, 17476, 0, 3008, 17476, 0, 3008, 17476, 0, 3008, 17476, 0, 3024, 17476, 0, 3024, 17476, 0, 3024, 17476, 0, 3024, 17476, 0, 3040, 17476, 0, 3040, 17476, 0, 3040, 17476, 0, 3040, 17476, 0, 5312, 16384, 0, 5328, 16384, 0, 5344, 16384, 0, 6144, 8, 0, 7808, 128, 0, 8704, 2048, 0, 576, 17, 0, 576, 17, 0, 2112, 17408, 0, 2112, 17408, 0, 2128, 17408, 0, 2128, 17408, 0, 2144, 17408, 0, 2144, 17408, 0, 3008, 17476, 0, 3008, 17476, 0, 3008, 17476, 0, 3008, 17476, 0, 3024, 17476, 0, 3024, 17476, 0, 3024, 17476, 0, 3024, 17476, 0, 3040, 17476, 0, 3040, 17476, 0, 3040, 17476, 0, 3040, 17476, 0, 5312, 16384, 0, 5328, 16384, 0, 5344, 16384, 0, 6144, 8, 0, 7808, 128, 0, 8704, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573912115331840_13_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573912115331840_13_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e79ba542 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573912115331840_13_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,493 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((345 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((383 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((397 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((415 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((426 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((435 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 1)) { + break; + } + } + } + case 1: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (448 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((471 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((481 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((488 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (495 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (504 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((519 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((550 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((573 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((580 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (590 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5952, 16, 0, 13584, 32768, 0, 13600, 32768, 0, 17808, 2, 0, 17824, 2, 0, 18256, 2, 0, 18272, 2, 0, 23440, 4, 0, 23456, 4, 0, 24528, 4096, 0, 28672, 32768, 0, 30144, 33792, 0, 30144, 33792, 0, 30160, 33792, 0, 30160, 33792, 0, 31680, 61440, 0, 31680, 61440, 0, 31680, 61440, 0, 31680, 61440, 0, 32256, 15, 0, 32256, 15, 0, 32256, 15, 0, 32256, 15, 0, 35200, 28, 0, 35200, 28, 0, 35200, 28, 0, 35204, 28, 0, 35204, 28, 0, 35204, 28, 0, 35216, 28, 0, 35216, 28, 0, 35216, 28, 0, 35220, 28, 0, 35220, 28, 0, 35220, 28, 0, 35232, 28, 0, 35232, 28, 0, 35232, 28, 0, 35236, 28, 0, 35236, 28, 0, 35236, 28, 0, 36672, 20, 0, 36672, 20, 0, 36676, 20, 0, 36676, 20, 0, 36688, 20, 0, 36688, 20, 0, 36692, 20, 0, 36692, 20, 0, 36704, 20, 0, 36704, 20, 0, 36708, 20, 0, 36708, 20, 0, 37760, 1, 0, 5952, 16, 0, 13584, 32768, 0, 13600, 32768, 0, 17808, 2, 0, 17824, 2, 0, 18256, 2, 0, 18272, 2, 0, 23440, 4, 0, 23456, 4, 0, 24528, 4096, 0, 28672, 32768, 0, 30144, 33792, 0, 30144, 33792, 0, 30160, 33792, 0, 30160, 33792, 0, 31680, 61440, 0, 31680, 61440, 0, 31680, 61440, 0, 31680, 61440, 0, 32256, 15, 0, 32256, 15, 0, 32256, 15, 0, 32256, 15, 0, 35200, 28, 0, 35200, 28, 0, 35200, 28, 0, 35204, 28, 0, 35204, 28, 0, 35204, 28, 0, 35216, 28, 0, 35216, 28, 0, 35216, 28, 0, 35220, 28, 0, 35220, 28, 0, 35220, 28, 0, 35232, 28, 0, 35232, 28, 0, 35232, 28, 0, 35236, 28, 0, 35236, 28, 0, 35236, 28, 0, 36672, 20, 0, 36672, 20, 0, 36676, 20, 0, 36676, 20, 0, 36688, 20, 0, 36688, 20, 0, 36692, 20, 0, 36692, 20, 0, 36704, 20, 0, 36704, 20, 0, 36708, 20, 0, 36708, 20, 0, 37760, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573948879868792_14_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573948879868792_14_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ddb9e52e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573948879868792_14_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,223 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((216 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 8720, 2, 0, 8736, 2, 0, 10176, 514, 0, 10176, 514, 0, 10496, 17476, 0, 10496, 17476, 0, 10496, 17476, 0, 10496, 17476, 0, 576, 17, 0, 576, 17, 0, 8720, 2, 0, 8736, 2, 0, 10176, 514, 0, 10176, 514, 0, 10496, 17476, 0, 10496, 17476, 0, 10496, 17476, 0, 10496, 17476, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573949086856358_15_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573949086856358_15_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b0b91691 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573949086856358_15_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573949167046128_16_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573949167046128_16_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db9ca0f6 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573949167046128_16_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2112, 32768, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2112, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573949307652099_17_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573949307652099_17_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fdb3c5ef --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573949307652099_17_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3712, 576, 0, 3712, 576, 0, 3456, 8226, 0, 3456, 8226, 0, 3456, 8226, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 4352, 73, 0, 4352, 73, 0, 4352, 73, 0, 4928, 1040, 0, 4928, 1040, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 3712, 576, 0, 3712, 576, 0, 3456, 8226, 0, 3456, 8226, 0, 3456, 8226, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 2688, 21781, 0, 4352, 73, 0, 4352, 73, 0, 4352, 73, 0, 4928, 1040, 0, 4928, 1040, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0, 5248, 28086, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573976865229915_21_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573976865229915_21_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..52a05c91 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573976865229915_21_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2320, 32768, 0, 4688, 32768, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2320, 32768, 0, 4688, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573980525659837_23_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573980525659837_23_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d6d52162 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573980525659837_23_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,272 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((159 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((191 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4224, 4097, 0, 4224, 4097, 0, 5312, 16, 0, 5328, 16, 0, 5344, 16, 0, 5888, 16, 0, 5904, 16, 0, 5920, 16, 0, 6400, 256, 0, 7296, 17476, 0, 7296, 17476, 0, 7296, 17476, 0, 7296, 17476, 0, 9024, 34824, 0, 9024, 34824, 0, 9024, 34824, 0, 9040, 34824, 0, 9040, 34824, 0, 9040, 34824, 0, 9056, 34824, 0, 9056, 34824, 0, 9056, 34824, 0, 10180, 34816, 0, 10180, 34816, 0, 10184, 34816, 0, 10184, 34816, 0, 10188, 34816, 0, 10188, 34816, 0, 10196, 34816, 0, 10196, 34816, 0, 10200, 34816, 0, 10200, 34816, 0, 10204, 34816, 0, 10204, 34816, 0, 10212, 34816, 0, 10212, 34816, 0, 10216, 34816, 0, 10216, 34816, 0, 10220, 34816, 0, 10220, 34816, 0, 12228, 34824, 0, 12228, 34824, 0, 12228, 34824, 0, 12232, 34824, 0, 12232, 34824, 0, 12232, 34824, 0, 12236, 34824, 0, 12236, 34824, 0, 12236, 34824, 0, 12244, 34824, 0, 12244, 34824, 0, 12244, 34824, 0, 12248, 34824, 0, 12248, 34824, 0, 12248, 34824, 0, 12252, 34824, 0, 12252, 34824, 0, 12252, 34824, 0, 12260, 34824, 0, 12260, 34824, 0, 12260, 34824, 0, 12264, 34824, 0, 12264, 34824, 0, 12264, 34824, 0, 12268, 34824, 0, 12268, 34824, 0, 12268, 34824, 0, 13440, 128, 0, 13456, 128, 0, 13472, 128, 0, 4224, 4097, 0, 4224, 4097, 0, 5312, 16, 0, 5328, 16, 0, 5344, 16, 0, 5888, 16, 0, 5904, 16, 0, 5920, 16, 0, 6400, 256, 0, 7296, 17476, 0, 7296, 17476, 0, 7296, 17476, 0, 7296, 17476, 0, 9024, 34824, 0, 9024, 34824, 0, 9024, 34824, 0, 9040, 34824, 0, 9040, 34824, 0, 9040, 34824, 0, 9056, 34824, 0, 9056, 34824, 0, 9056, 34824, 0, 10180, 34816, 0, 10180, 34816, 0, 10184, 34816, 0, 10184, 34816, 0, 10188, 34816, 0, 10188, 34816, 0, 10196, 34816, 0, 10196, 34816, 0, 10200, 34816, 0, 10200, 34816, 0, 10204, 34816, 0, 10204, 34816, 0, 10212, 34816, 0, 10212, 34816, 0, 10216, 34816, 0, 10216, 34816, 0, 10220, 34816, 0, 10220, 34816, 0, 12228, 34824, 0, 12228, 34824, 0, 12228, 34824, 0, 12232, 34824, 0, 12232, 34824, 0, 12232, 34824, 0, 12236, 34824, 0, 12236, 34824, 0, 12236, 34824, 0, 12244, 34824, 0, 12244, 34824, 0, 12244, 34824, 0, 12248, 34824, 0, 12248, 34824, 0, 12248, 34824, 0, 12252, 34824, 0, 12252, 34824, 0, 12252, 34824, 0, 12260, 34824, 0, 12260, 34824, 0, 12260, 34824, 0, 12264, 34824, 0, 12264, 34824, 0, 12264, 34824, 0, 12268, 34824, 0, 12268, 34824, 0, 12268, 34824, 0, 13440, 128, 0, 13456, 128, 0, 13472, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573983644687589_24_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573983644687589_24_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..efd44f12 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573983644687589_24_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756573983766363717_25_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756573983766363717_25_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5cdc125f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756573983766363717_25_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,269 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((191 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 5632, 1040, 0, 5632, 1040, 0, 5952, 18724, 0, 5952, 18724, 0, 5952, 18724, 0, 5952, 18724, 0, 5952, 18724, 0, 6848, 73, 0, 6848, 73, 0, 6848, 73, 0, 7424, 5201, 0, 7424, 5201, 0, 7424, 5201, 0, 7424, 5201, 0, 7424, 5201, 0, 7744, 18724, 0, 7744, 18724, 0, 7744, 18724, 0, 7744, 18724, 0, 7744, 18724, 0, 8384, 17, 0, 8384, 17, 0, 12992, 17476, 0, 12992, 17476, 0, 12992, 17476, 0, 12992, 17476, 0, 13440, 34952, 0, 13440, 34952, 0, 13440, 34952, 0, 13440, 34952, 0, 768, 73, 0, 768, 73, 0, 768, 73, 0, 5632, 1040, 0, 5632, 1040, 0, 5952, 18724, 0, 5952, 18724, 0, 5952, 18724, 0, 5952, 18724, 0, 5952, 18724, 0, 6848, 73, 0, 6848, 73, 0, 6848, 73, 0, 7424, 5201, 0, 7424, 5201, 0, 7424, 5201, 0, 7424, 5201, 0, 7424, 5201, 0, 7744, 18724, 0, 7744, 18724, 0, 7744, 18724, 0, 7744, 18724, 0, 7744, 18724, 0, 8384, 17, 0, 8384, 17, 0, 12992, 17476, 0, 12992, 17476, 0, 12992, 17476, 0, 12992, 17476, 0, 13440, 34952, 0, 13440, 34952, 0, 13440, 34952, 0, 13440, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574039656046668_27_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574039656046668_27_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0a1a62f3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574039656046668_27_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((105 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 7184, 2048, 0, 7200, 2048, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 7184, 2048, 0, 7200, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574048657279555_29_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574048657279555_29_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..339187e0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574048657279555_29_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,251 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((176 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 4624, 2, 0, 4640, 2, 0, 4656, 2, 0, 5328, 8194, 0, 5328, 8194, 0, 5344, 8194, 0, 5344, 8194, 0, 5360, 8194, 0, 5360, 8194, 0, 7952, 512, 0, 7968, 512, 0, 7984, 512, 0, 8256, 17476, 0, 8256, 17476, 0, 8256, 17476, 0, 8256, 17476, 0, 12096, 85, 0, 12096, 85, 0, 12096, 85, 0, 12096, 85, 0, 768, 1, 0, 4624, 2, 0, 4640, 2, 0, 4656, 2, 0, 5328, 8194, 0, 5328, 8194, 0, 5344, 8194, 0, 5344, 8194, 0, 5360, 8194, 0, 5360, 8194, 0, 7952, 512, 0, 7968, 512, 0, 7984, 512, 0, 8256, 17476, 0, 8256, 17476, 0, 8256, 17476, 0, 8256, 17476, 0, 12096, 85, 0, 12096, 85, 0, 12096, 85, 0, 12096, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574057488860083_32_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574057488860083_32_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574057488860083_32_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574057603956571_33_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574057603956571_33_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bcce3e3a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574057603956571_33_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,271 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4160, 16388, 0, 4160, 16388, 0, 4176, 16388, 0, 4176, 16388, 0, 4192, 16388, 0, 4192, 16388, 0, 5376, 1024, 0, 5392, 1024, 0, 5408, 1024, 0, 15552, 16644, 0, 15552, 16644, 0, 15552, 16644, 0, 16896, 40962, 0, 16896, 40962, 0, 16896, 40962, 0, 16912, 40962, 0, 16912, 40962, 0, 16912, 40962, 0, 19392, 2048, 0, 19408, 2048, 0, 20096, 43018, 0, 20096, 43018, 0, 20096, 43018, 0, 20096, 43018, 0, 20096, 43018, 0, 20112, 43018, 0, 20112, 43018, 0, 20112, 43018, 0, 20112, 43018, 0, 20112, 43018, 0, 576, 17, 0, 576, 17, 0, 4160, 16388, 0, 4160, 16388, 0, 4176, 16388, 0, 4176, 16388, 0, 4192, 16388, 0, 4192, 16388, 0, 5376, 1024, 0, 5392, 1024, 0, 5408, 1024, 0, 15552, 16644, 0, 15552, 16644, 0, 15552, 16644, 0, 16896, 40962, 0, 16896, 40962, 0, 16896, 40962, 0, 16912, 40962, 0, 16912, 40962, 0, 16912, 40962, 0, 19392, 2048, 0, 19408, 2048, 0, 20096, 43018, 0, 20096, 43018, 0, 20096, 43018, 0, 20096, 43018, 0, 20096, 43018, 0, 20112, 43018, 0, 20112, 43018, 0, 20112, 43018, 0, 20112, 43018, 0, 20112, 43018, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574068823500784_35_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574068823500784_35_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0158ce94 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574068823500784_35_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,166 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8384, 33288, 0, 8384, 33288, 0, 8384, 33288, 0, 8960, 1040, 0, 8960, 1040, 0, 9600, 18724, 0, 9600, 18724, 0, 9600, 18724, 0, 9600, 18724, 0, 9600, 18724, 0, 8384, 33288, 0, 8384, 33288, 0, 8384, 33288, 0, 8960, 1040, 0, 8960, 1040, 0, 9600, 18724, 0, 9600, 18724, 0, 9600, 18724, 0, 9600, 18724, 0, 9600, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574070244200267_37_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574070244200267_37_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1f24f542 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574070244200267_37_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 696 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 1680, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2752, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2756, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2760, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2768, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2772, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 2776, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3328, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3332, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3336, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3344, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3348, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0, 3352, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574071274812571_38_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574071274812571_38_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..79bb2588 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574071274812571_38_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,320 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4304, 20481, 0, 4304, 20481, 0, 4304, 20481, 0, 4320, 20481, 0, 4320, 20481, 0, 4320, 20481, 0, 4336, 20481, 0, 4336, 20481, 0, 4336, 20481, 0, 5136, 20481, 0, 5136, 20481, 0, 5136, 20481, 0, 5152, 20481, 0, 5152, 20481, 0, 5152, 20481, 0, 5168, 20481, 0, 5168, 20481, 0, 5168, 20481, 0, 6464, 20480, 0, 6464, 20480, 0, 16832, 32, 0, 16848, 32, 0, 16864, 32, 0, 4304, 20481, 0, 4304, 20481, 0, 4304, 20481, 0, 4320, 20481, 0, 4320, 20481, 0, 4320, 20481, 0, 4336, 20481, 0, 4336, 20481, 0, 4336, 20481, 0, 5136, 20481, 0, 5136, 20481, 0, 5136, 20481, 0, 5152, 20481, 0, 5152, 20481, 0, 5152, 20481, 0, 5168, 20481, 0, 5168, 20481, 0, 5168, 20481, 0, 6464, 20480, 0, 6464, 20480, 0, 16832, 32, 0, 16848, 32, 0, 16864, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574076602719791_39_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574076602719791_39_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c57b99d9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574076602719791_39_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,145 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 49152, 0, 3008, 49152, 0, 2624, 276, 0, 2624, 276, 0, 2624, 276, 0, 2368, 5185, 0, 2368, 5185, 0, 2368, 5185, 0, 2368, 5185, 0, 6912, 4112, 0, 6912, 4112, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 5504, 16384, 0, 3008, 49152, 0, 3008, 49152, 0, 2624, 276, 0, 2624, 276, 0, 2624, 276, 0, 2368, 5185, 0, 2368, 5185, 0, 2368, 5185, 0, 2368, 5185, 0, 6912, 4112, 0, 6912, 4112, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 6528, 43690, 0, 5504, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574076890050388_40_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574076890050388_40_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..545c3014 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574076890050388_40_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 2944, 32776, 0, 2944, 32776, 0, 3648, 32768, 0, 6608, 4097, 0, 6608, 4097, 0, 6612, 4097, 0, 6612, 4097, 0, 7440, 4097, 0, 7440, 4097, 0, 7444, 4097, 0, 7444, 4097, 0, 8848, 256, 0, 9920, 17476, 0, 9920, 17476, 0, 9920, 17476, 0, 9920, 17476, 0, 10560, 8, 0, 11456, 128, 0, 11776, 2048, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 2944, 32776, 0, 2944, 32776, 0, 3648, 32768, 0, 6608, 4097, 0, 6608, 4097, 0, 6612, 4097, 0, 6612, 4097, 0, 7440, 4097, 0, 7440, 4097, 0, 7444, 4097, 0, 7444, 4097, 0, 8848, 256, 0, 9920, 17476, 0, 9920, 17476, 0, 9920, 17476, 0, 9920, 17476, 0, 10560, 8, 0, 11456, 128, 0, 11776, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574078028458376_41_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574078028458376_41_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f703cae8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574078028458376_41_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((149 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((241 << 6) | (i3 << 4)) | (i4 << 2)) | counter5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((248 << 6) | (i3 << 4)) | (i4 << 2)) | counter5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((267 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((293 << 6) | (i3 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((329 << 6) | (i3 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((338 << 6) | (i3 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((345 << 6) | (i3 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((356 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 588 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 8738, 0, 1472, 8738, 0, 1472, 8738, 0, 1472, 8738, 0, 2048, 8738, 0, 2048, 8738, 0, 2048, 8738, 0, 2048, 8738, 0, 7616, 64, 0, 7632, 64, 0, 10752, 16388, 0, 10752, 16388, 0, 10768, 16388, 0, 10768, 16388, 0, 11392, 34952, 0, 11392, 34952, 0, 11392, 34952, 0, 11392, 34952, 0, 12352, 32, 0, 12368, 32, 0, 14016, 2048, 0, 14032, 2048, 0, 15873, 1, 0, 15874, 1, 0, 15875, 1, 0, 15877, 1, 0, 15878, 1, 0, 15879, 1, 0, 15881, 1, 0, 15882, 1, 0, 15883, 1, 0, 15889, 1, 0, 15890, 1, 0, 15891, 1, 0, 15893, 1, 0, 15894, 1, 0, 15895, 1, 0, 15897, 1, 0, 15898, 1, 0, 15899, 1, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 21056, 512, 0, 21060, 512, 0, 21072, 512, 0, 21076, 512, 0, 21632, 41002, 0, 21632, 41002, 0, 21632, 41002, 0, 21632, 41002, 0, 21632, 41002, 0, 21636, 41002, 0, 21636, 41002, 0, 21636, 41002, 0, 21636, 41002, 0, 21636, 41002, 0, 21648, 41002, 0, 21648, 41002, 0, 21648, 41002, 0, 21648, 41002, 0, 21648, 41002, 0, 21652, 41002, 0, 21652, 41002, 0, 21652, 41002, 0, 21652, 41002, 0, 21652, 41002, 0, 22080, 64, 0, 22084, 64, 0, 22096, 64, 0, 22100, 64, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 576, 17, 0, 576, 17, 0, 1472, 8738, 0, 1472, 8738, 0, 1472, 8738, 0, 1472, 8738, 0, 2048, 8738, 0, 2048, 8738, 0, 2048, 8738, 0, 2048, 8738, 0, 7616, 64, 0, 7632, 64, 0, 10752, 16388, 0, 10752, 16388, 0, 10768, 16388, 0, 10768, 16388, 0, 11392, 34952, 0, 11392, 34952, 0, 11392, 34952, 0, 11392, 34952, 0, 12352, 32, 0, 12368, 32, 0, 14016, 2048, 0, 14032, 2048, 0, 15873, 1, 0, 15874, 1, 0, 15875, 1, 0, 15877, 1, 0, 15878, 1, 0, 15879, 1, 0, 15881, 1, 0, 15882, 1, 0, 15883, 1, 0, 15889, 1, 0, 15890, 1, 0, 15891, 1, 0, 15893, 1, 0, 15894, 1, 0, 15895, 1, 0, 15897, 1, 0, 15898, 1, 0, 15899, 1, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17792, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 17808, 61454, 0, 21056, 512, 0, 21060, 512, 0, 21072, 512, 0, 21076, 512, 0, 21632, 41002, 0, 21632, 41002, 0, 21632, 41002, 0, 21632, 41002, 0, 21632, 41002, 0, 21636, 41002, 0, 21636, 41002, 0, 21636, 41002, 0, 21636, 41002, 0, 21636, 41002, 0, 21648, 41002, 0, 21648, 41002, 0, 21648, 41002, 0, 21648, 41002, 0, 21648, 41002, 0, 21652, 41002, 0, 21652, 41002, 0, 21652, 41002, 0, 21652, 41002, 0, 21652, 41002, 0, 22080, 64, 0, 22084, 64, 0, 22096, 64, 0, 22100, 64, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22784, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0, 22800, 61454, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574178660588752_42_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574178660588752_42_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87039358 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574178660588752_42_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,190 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 5312, 1040, 0, 5312, 1040, 0, 5632, 18724, 0, 5632, 18724, 0, 5632, 18724, 0, 5632, 18724, 0, 5632, 18724, 0, 6272, 15, 0, 6272, 15, 0, 6272, 15, 0, 6272, 15, 0, 8704, 128, 0, 768, 73, 0, 768, 73, 0, 768, 73, 0, 5312, 1040, 0, 5312, 1040, 0, 5632, 18724, 0, 5632, 18724, 0, 5632, 18724, 0, 5632, 18724, 0, 5632, 18724, 0, 6272, 15, 0, 6272, 15, 0, 6272, 15, 0, 6272, 15, 0, 8704, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574178897671352_43_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574178897671352_43_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6c9bf58 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574178897671352_43_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2240, 1, 0, 3136, 18436, 0, 3136, 18436, 0, 3136, 18436, 0, 4096, 57351, 0, 4096, 57351, 0, 4096, 57351, 0, 4096, 57351, 0, 4096, 57351, 0, 4096, 57351, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2240, 1, 0, 3136, 18436, 0, 3136, 18436, 0, 3136, 18436, 0, 4096, 57351, 0, 4096, 57351, 0, 4096, 57351, 0, 4096, 57351, 0, 4096, 57351, 0, 4096, 57351, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574181321695490_45_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574181321695490_45_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17242c78 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574181321695490_45_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,154 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 16384, 0, 6784, 1, 0, 768, 16384, 0, 6784, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574181492719880_46_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574181492719880_46_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..22c45915 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574181492719880_46_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,254 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((193 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((204 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((219 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 2)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 10496, 32768, 0, 10512, 32768, 0, 11712, 32776, 0, 11712, 32776, 0, 11716, 32776, 0, 11716, 32776, 0, 11720, 32776, 0, 11720, 32776, 0, 11728, 32776, 0, 11728, 32776, 0, 11732, 32776, 0, 11732, 32776, 0, 11736, 32776, 0, 11736, 32776, 0, 14016, 128, 0, 14020, 128, 0, 14024, 128, 0, 14032, 128, 0, 14036, 128, 0, 14040, 128, 0, 576, 17, 0, 576, 17, 0, 10496, 32768, 0, 10512, 32768, 0, 11712, 32776, 0, 11712, 32776, 0, 11716, 32776, 0, 11716, 32776, 0, 11720, 32776, 0, 11720, 32776, 0, 11728, 32776, 0, 11728, 32776, 0, 11732, 32776, 0, 11732, 32776, 0, 11736, 32776, 0, 11736, 32776, 0, 14016, 128, 0, 14020, 128, 0, 14024, 128, 0, 14032, 128, 0, 14036, 128, 0, 14040, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574182695050788_47_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574182695050788_47_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..268b5667 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574182695050788_47_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,129 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 127, 0, 976, 127, 0, 976, 127, 0, 976, 127, 0, 976, 127, 0, 976, 127, 0, 976, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1424, 63488, 0, 1424, 63488, 0, 1424, 63488, 0, 1424, 63488, 0, 1424, 63488, 0, 1440, 63488, 0, 1440, 63488, 0, 1440, 63488, 0, 1440, 63488, 0, 1440, 63488, 0, 1456, 63488, 0, 1456, 63488, 0, 1456, 63488, 0, 1456, 63488, 0, 1456, 63488, 0, 3456, 1, 0, 3472, 1, 0, 4864, 1, 0, 976, 127, 0, 976, 127, 0, 976, 127, 0, 976, 127, 0, 976, 127, 0, 976, 127, 0, 976, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 992, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1008, 127, 0, 1424, 63488, 0, 1424, 63488, 0, 1424, 63488, 0, 1424, 63488, 0, 1424, 63488, 0, 1440, 63488, 0, 1440, 63488, 0, 1440, 63488, 0, 1440, 63488, 0, 1440, 63488, 0, 1456, 63488, 0, 1456, 63488, 0, 1456, 63488, 0, 1456, 63488, 0, 1456, 63488, 0, 3456, 1, 0, 3472, 1, 0, 4864, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574184037098342_49_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574184037098342_49_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..659313f1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574184037098342_49_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 10496, 16384, 0, 10944, 32768, 0, 11648, 57347, 0, 11648, 57347, 0, 11648, 57347, 0, 11648, 57347, 0, 11648, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 10496, 16384, 0, 10944, 32768, 0, 11648, 57347, 0, 11648, 57347, 0, 11648, 57347, 0, 11648, 57347, 0, 11648, 57347, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574184215385351_50_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574184215385351_50_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e5fdd96c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574184215385351_50_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,321 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 7376, 64, 0, 7392, 64, 0, 8336, 4096, 0, 8352, 4096, 0, 9728, 1024, 0, 9744, 1024, 0, 11008, 16384, 0, 12096, 8, 0, 13568, 2080, 0, 13568, 2080, 0, 15104, 32, 0, 15552, 2048, 0, 16128, 2080, 0, 16128, 2080, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 5248, 21845, 0, 7376, 64, 0, 7392, 64, 0, 8336, 4096, 0, 8352, 4096, 0, 9728, 1024, 0, 9744, 1024, 0, 11008, 16384, 0, 12096, 8, 0, 13568, 2080, 0, 13568, 2080, 0, 15104, 32, 0, 15552, 2048, 0, 16128, 2080, 0, 16128, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574185275716566_51_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574185275716566_51_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c52d9798 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574185275716566_51_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 708 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 8192, 0, 864, 8192, 0, 2260, 8464, 0, 2260, 8464, 0, 2260, 8464, 0, 2264, 8464, 0, 2264, 8464, 0, 2264, 8464, 0, 2276, 8464, 0, 2276, 8464, 0, 2276, 8464, 0, 2280, 8464, 0, 2280, 8464, 0, 2280, 8464, 0, 2900, 17, 0, 2900, 17, 0, 2904, 17, 0, 2904, 17, 0, 2916, 17, 0, 2916, 17, 0, 2920, 17, 0, 2920, 17, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 5716, 6178, 0, 5716, 6178, 0, 5716, 6178, 0, 5716, 6178, 0, 5720, 6178, 0, 5720, 6178, 0, 5720, 6178, 0, 5720, 6178, 0, 5732, 6178, 0, 5732, 6178, 0, 5732, 6178, 0, 5732, 6178, 0, 5736, 6178, 0, 5736, 6178, 0, 5736, 6178, 0, 5736, 6178, 0, 848, 8192, 0, 864, 8192, 0, 2260, 8464, 0, 2260, 8464, 0, 2260, 8464, 0, 2264, 8464, 0, 2264, 8464, 0, 2264, 8464, 0, 2276, 8464, 0, 2276, 8464, 0, 2276, 8464, 0, 2280, 8464, 0, 2280, 8464, 0, 2280, 8464, 0, 2900, 17, 0, 2900, 17, 0, 2904, 17, 0, 2904, 17, 0, 2916, 17, 0, 2916, 17, 0, 2920, 17, 0, 2920, 17, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3796, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3800, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3812, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 3816, 26214, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4244, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4248, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4260, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 4264, 61166, 0, 5716, 6178, 0, 5716, 6178, 0, 5716, 6178, 0, 5716, 6178, 0, 5720, 6178, 0, 5720, 6178, 0, 5720, 6178, 0, 5720, 6178, 0, 5732, 6178, 0, 5732, 6178, 0, 5732, 6178, 0, 5732, 6178, 0, 5736, 6178, 0, 5736, 6178, 0, 5736, 6178, 0, 5736, 6178, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574192350814513_52_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574192350814513_52_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..62db2c87 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574192350814513_52_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 21525, 0, 1344, 21525, 0, 1344, 21525, 0, 1344, 21525, 0, 1344, 21525, 0, 1344, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 2304, 4, 0, 2308, 4, 0, 2320, 4, 0, 2324, 4, 0, 2336, 4, 0, 2340, 4, 0, 3008, 16385, 0, 3008, 16385, 0, 3024, 16385, 0, 3024, 16385, 0, 3040, 16385, 0, 3040, 16385, 0, 1344, 21525, 0, 1344, 21525, 0, 1344, 21525, 0, 1344, 21525, 0, 1344, 21525, 0, 1344, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1360, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 1376, 21525, 0, 2304, 4, 0, 2308, 4, 0, 2320, 4, 0, 2324, 4, 0, 2336, 4, 0, 2340, 4, 0, 3008, 16385, 0, 3008, 16385, 0, 3024, 16385, 0, 3024, 16385, 0, 3040, 16385, 0, 3040, 16385, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574193234972274_53_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574193234972274_53_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca36fb11 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574193234972274_53_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,449 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((347 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (418 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (425 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2384, 21509, 0, 2384, 21509, 0, 2384, 21509, 0, 2384, 21509, 0, 2384, 21509, 0, 2388, 21509, 0, 2388, 21509, 0, 2388, 21509, 0, 2388, 21509, 0, 2388, 21509, 0, 2392, 21509, 0, 2392, 21509, 0, 2392, 21509, 0, 2392, 21509, 0, 2392, 21509, 0, 2400, 21509, 0, 2400, 21509, 0, 2400, 21509, 0, 2400, 21509, 0, 2400, 21509, 0, 2404, 21509, 0, 2404, 21509, 0, 2404, 21509, 0, 2404, 21509, 0, 2404, 21509, 0, 2408, 21509, 0, 2408, 21509, 0, 2408, 21509, 0, 2408, 21509, 0, 2408, 21509, 0, 5264, 1280, 0, 5264, 1280, 0, 5280, 1280, 0, 5280, 1280, 0, 6224, 1024, 0, 6240, 1024, 0, 7568, 4176, 0, 7568, 4176, 0, 7568, 4176, 0, 7584, 4176, 0, 7584, 4176, 0, 7584, 4176, 0, 18304, 1, 0, 18880, 16, 0, 19200, 256, 0, 23232, 17476, 0, 23232, 17476, 0, 23232, 17476, 0, 23232, 17476, 0, 2384, 21509, 0, 2384, 21509, 0, 2384, 21509, 0, 2384, 21509, 0, 2384, 21509, 0, 2388, 21509, 0, 2388, 21509, 0, 2388, 21509, 0, 2388, 21509, 0, 2388, 21509, 0, 2392, 21509, 0, 2392, 21509, 0, 2392, 21509, 0, 2392, 21509, 0, 2392, 21509, 0, 2400, 21509, 0, 2400, 21509, 0, 2400, 21509, 0, 2400, 21509, 0, 2400, 21509, 0, 2404, 21509, 0, 2404, 21509, 0, 2404, 21509, 0, 2404, 21509, 0, 2404, 21509, 0, 2408, 21509, 0, 2408, 21509, 0, 2408, 21509, 0, 2408, 21509, 0, 2408, 21509, 0, 5264, 1280, 0, 5264, 1280, 0, 5280, 1280, 0, 5280, 1280, 0, 6224, 1024, 0, 6240, 1024, 0, 7568, 4176, 0, 7568, 4176, 0, 7568, 4176, 0, 7584, 4176, 0, 7584, 4176, 0, 7584, 4176, 0, 18304, 1, 0, 18880, 16, 0, 19200, 256, 0, 23232, 17476, 0, 23232, 17476, 0, 23232, 17476, 0, 23232, 17476, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574208528169077_55_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574208528169077_55_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6a59eb6c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574208528169077_55_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((159 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((261 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((270 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 636 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [13824, 1316, 0, 13824, 1316, 0, 13824, 1316, 0, 13824, 1316, 0, 14784, 16384, 0, 14800, 16384, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17728, 2, 0, 17744, 2, 0, 3008, 1, 0, 3024, 1, 0, 3712, 1, 0, 3728, 1, 0, 4672, 1088, 0, 4672, 1088, 0, 4688, 1088, 0, 4688, 1088, 0, 6080, 1, 0, 9168, 1, 0, 1728, 320, 0, 1728, 320, 0, 1744, 320, 0, 1744, 320, 0, 13824, 1316, 0, 13824, 1316, 0, 13824, 1316, 0, 13824, 1316, 0, 14784, 16384, 0, 14800, 16384, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16708, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16712, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16716, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16724, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16728, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 16732, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17284, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17288, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17292, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17300, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17304, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17308, 21844, 0, 17728, 2, 0, 17744, 2, 0, 3008, 1, 0, 3024, 1, 0, 3712, 1, 0, 3728, 1, 0, 4672, 1088, 0, 4672, 1088, 0, 4688, 1088, 0, 4688, 1088, 0, 6080, 1, 0, 9168, 1, 0, 1728, 320, 0, 1728, 320, 0, 1744, 320, 0, 1744, 320, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574227897394344_57_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574227897394344_57_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1a091f45 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574227897394344_57_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,78 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574240619112606_59_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574240619112606_59_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c550161e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574240619112606_59_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,193 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 8)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 3904, 16384, 0, 4352, 85, 0, 4352, 85, 0, 4352, 85, 0, 4352, 85, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 3904, 16384, 0, 4352, 85, 0, 4352, 85, 0, 4352, 85, 0, 4352, 85, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574243589507452_61_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574243589507452_61_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f199d135 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574243589507452_61_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3520, 20, 0, 3520, 20, 0, 3524, 20, 0, 3524, 20, 0, 3536, 20, 0, 3536, 20, 0, 3540, 20, 0, 3540, 20, 0, 3520, 20, 0, 3520, 20, 0, 3524, 20, 0, 3524, 20, 0, 3536, 20, 0, 3536, 20, 0, 3540, 20, 0, 3540, 20, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574243827118967_62_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574243827118967_62_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..07eb5281 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574243827118967_62_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 7)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 8912, 1032, 0, 8912, 1032, 0, 8928, 1032, 0, 8928, 1032, 0, 8944, 1032, 0, 8944, 1032, 0, 10000, 2, 0, 10004, 2, 0, 10008, 2, 0, 10016, 2, 0, 10020, 2, 0, 10024, 2, 0, 10032, 2, 0, 10036, 2, 0, 10040, 2, 0, 10576, 8, 0, 10580, 8, 0, 10584, 8, 0, 10592, 8, 0, 10596, 8, 0, 10600, 8, 0, 10608, 8, 0, 10612, 8, 0, 10616, 8, 0, 11536, 2114, 0, 11536, 2114, 0, 11536, 2114, 0, 11552, 2114, 0, 11552, 2114, 0, 11552, 2114, 0, 11568, 2114, 0, 11568, 2114, 0, 11568, 2114, 0, 13952, 72, 0, 13952, 72, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 7568, 21845, 0, 8912, 1032, 0, 8912, 1032, 0, 8928, 1032, 0, 8928, 1032, 0, 8944, 1032, 0, 8944, 1032, 0, 10000, 2, 0, 10004, 2, 0, 10008, 2, 0, 10016, 2, 0, 10020, 2, 0, 10024, 2, 0, 10032, 2, 0, 10036, 2, 0, 10040, 2, 0, 10576, 8, 0, 10580, 8, 0, 10584, 8, 0, 10592, 8, 0, 10596, 8, 0, 10600, 8, 0, 10608, 8, 0, 10612, 8, 0, 10616, 8, 0, 11536, 2114, 0, 11536, 2114, 0, 11536, 2114, 0, 11552, 2114, 0, 11552, 2114, 0, 11552, 2114, 0, 11568, 2114, 0, 11568, 2114, 0, 11568, 2114, 0, 13952, 72, 0, 13952, 72, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574261738699261_64_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574261738699261_64_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eb77d28c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574261738699261_64_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 342 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 4097, 0, 1344, 4097, 0, 1360, 4097, 0, 1360, 4097, 0, 3908, 8738, 0, 3908, 8738, 0, 3908, 8738, 0, 3908, 8738, 0, 3912, 8738, 0, 3912, 8738, 0, 3912, 8738, 0, 3912, 8738, 0, 3924, 8738, 0, 3924, 8738, 0, 3924, 8738, 0, 3924, 8738, 0, 3928, 8738, 0, 3928, 8738, 0, 3928, 8738, 0, 3928, 8738, 0, 3940, 8738, 0, 3940, 8738, 0, 3940, 8738, 0, 3940, 8738, 0, 3944, 8738, 0, 3944, 8738, 0, 3944, 8738, 0, 3944, 8738, 0, 6400, 17472, 0, 6400, 17472, 0, 6400, 17472, 0, 6416, 17472, 0, 6416, 17472, 0, 6416, 17472, 0, 7360, 4, 0, 7376, 4, 0, 9040, 2176, 0, 9040, 2176, 0, 9056, 2176, 0, 9056, 2176, 0, 9808, 49152, 0, 9808, 49152, 0, 9824, 49152, 0, 9824, 49152, 0, 10512, 4, 0, 10528, 4, 0, 13312, 32768, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12672, 128, 0, 12288, 8, 0, 1344, 4097, 0, 1344, 4097, 0, 1360, 4097, 0, 1360, 4097, 0, 3908, 8738, 0, 3908, 8738, 0, 3908, 8738, 0, 3908, 8738, 0, 3912, 8738, 0, 3912, 8738, 0, 3912, 8738, 0, 3912, 8738, 0, 3924, 8738, 0, 3924, 8738, 0, 3924, 8738, 0, 3924, 8738, 0, 3928, 8738, 0, 3928, 8738, 0, 3928, 8738, 0, 3928, 8738, 0, 3940, 8738, 0, 3940, 8738, 0, 3940, 8738, 0, 3940, 8738, 0, 3944, 8738, 0, 3944, 8738, 0, 3944, 8738, 0, 3944, 8738, 0, 6400, 17472, 0, 6400, 17472, 0, 6400, 17472, 0, 6416, 17472, 0, 6416, 17472, 0, 6416, 17472, 0, 7360, 4, 0, 7376, 4, 0, 9040, 2176, 0, 9040, 2176, 0, 9056, 2176, 0, 9056, 2176, 0, 9808, 49152, 0, 9808, 49152, 0, 9824, 49152, 0, 9824, 49152, 0, 10512, 4, 0, 10528, 4, 0, 13312, 32768, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12928, 21845, 0, 12672, 128, 0, 12288, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574275416482937_65_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574275416482937_65_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5cc317a9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574275416482937_65_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter3 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 5, 0, 1280, 5, 0, 3008, 16, 0, 7488, 1024, 0, 8192, 64, 0, 10368, 8, 0, 11456, 32768, 0, 11472, 32768, 0, 13840, 2, 0, 13856, 2, 0, 13872, 2, 0, 14672, 2, 0, 14688, 2, 0, 14704, 2, 0, 15248, 8320, 0, 15248, 8320, 0, 15264, 8320, 0, 15264, 8320, 0, 15280, 8320, 0, 15280, 8320, 0, 1280, 5, 0, 1280, 5, 0, 3008, 16, 0, 7488, 1024, 0, 8192, 64, 0, 10368, 8, 0, 11456, 32768, 0, 11472, 32768, 0, 13840, 2, 0, 13856, 2, 0, 13872, 2, 0, 14672, 2, 0, 14688, 2, 0, 14704, 2, 0, 15248, 8320, 0, 15248, 8320, 0, 15264, 8320, 0, 15264, 8320, 0, 15280, 8320, 0, 15280, 8320, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574277072852621_66_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574277072852621_66_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c509c09b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574277072852621_66_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 4096, 0, 1872, 4096, 0, 1888, 4096, 0, 4032, 4096, 0, 4036, 4096, 0, 4040, 4096, 0, 4048, 4096, 0, 4052, 4096, 0, 4056, 4096, 0, 4064, 4096, 0, 4068, 4096, 0, 4072, 4096, 0, 6912, 32769, 0, 6912, 32769, 0, 6928, 32769, 0, 6928, 32769, 0, 6944, 32769, 0, 6944, 32769, 0, 10048, 32769, 0, 10048, 32769, 0, 10064, 32769, 0, 10064, 32769, 0, 10080, 32769, 0, 10080, 32769, 0, 11264, 4608, 0, 11264, 4608, 0, 11280, 4608, 0, 11280, 4608, 0, 11296, 4608, 0, 11296, 4608, 0, 12032, 1040, 0, 12032, 1040, 0, 12352, 18724, 0, 12352, 18724, 0, 12352, 18724, 0, 12352, 18724, 0, 12352, 18724, 0, 1856, 4096, 0, 1872, 4096, 0, 1888, 4096, 0, 4032, 4096, 0, 4036, 4096, 0, 4040, 4096, 0, 4048, 4096, 0, 4052, 4096, 0, 4056, 4096, 0, 4064, 4096, 0, 4068, 4096, 0, 4072, 4096, 0, 6912, 32769, 0, 6912, 32769, 0, 6928, 32769, 0, 6928, 32769, 0, 6944, 32769, 0, 6944, 32769, 0, 10048, 32769, 0, 10048, 32769, 0, 10064, 32769, 0, 10064, 32769, 0, 10080, 32769, 0, 10080, 32769, 0, 11264, 4608, 0, 11264, 4608, 0, 11280, 4608, 0, 11280, 4608, 0, 11296, 4608, 0, 11296, 4608, 0, 12032, 1040, 0, 12032, 1040, 0, 12352, 18724, 0, 12352, 18724, 0, 12352, 18724, 0, 12352, 18724, 0, 12352, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574279469909977_67_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574279469909977_67_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1ad2ed22 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574279469909977_67_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,315 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 360 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 2, 0, 864, 2, 0, 880, 2, 0, 1808, 32768, 0, 1812, 32768, 0, 1816, 32768, 0, 1824, 32768, 0, 1828, 32768, 0, 1832, 32768, 0, 1840, 32768, 0, 1844, 32768, 0, 1848, 32768, 0, 2384, 64, 0, 2388, 64, 0, 2392, 64, 0, 2400, 64, 0, 2404, 64, 0, 2408, 64, 0, 2416, 64, 0, 2420, 64, 0, 2424, 64, 0, 3200, 17, 0, 3200, 17, 0, 3776, 4369, 0, 3776, 4369, 0, 3776, 4369, 0, 3776, 4369, 0, 10752, 16388, 0, 10752, 16388, 0, 11200, 34952, 0, 11200, 34952, 0, 11200, 34952, 0, 11200, 34952, 0, 12608, 32777, 0, 12608, 32777, 0, 12608, 32777, 0, 12624, 32777, 0, 12624, 32777, 0, 12624, 32777, 0, 17088, 36873, 0, 17088, 36873, 0, 17088, 36873, 0, 17088, 36873, 0, 17104, 36873, 0, 17104, 36873, 0, 17104, 36873, 0, 17104, 36873, 0, 17792, 36865, 0, 17792, 36865, 0, 17792, 36865, 0, 17808, 36865, 0, 17808, 36865, 0, 17808, 36865, 0, 18368, 1040, 0, 18368, 1040, 0, 18688, 18724, 0, 18688, 18724, 0, 18688, 18724, 0, 18688, 18724, 0, 18688, 18724, 0, 848, 2, 0, 864, 2, 0, 880, 2, 0, 1808, 32768, 0, 1812, 32768, 0, 1816, 32768, 0, 1824, 32768, 0, 1828, 32768, 0, 1832, 32768, 0, 1840, 32768, 0, 1844, 32768, 0, 1848, 32768, 0, 2384, 64, 0, 2388, 64, 0, 2392, 64, 0, 2400, 64, 0, 2404, 64, 0, 2408, 64, 0, 2416, 64, 0, 2420, 64, 0, 2424, 64, 0, 3200, 17, 0, 3200, 17, 0, 3776, 4369, 0, 3776, 4369, 0, 3776, 4369, 0, 3776, 4369, 0, 10752, 16388, 0, 10752, 16388, 0, 11200, 34952, 0, 11200, 34952, 0, 11200, 34952, 0, 11200, 34952, 0, 12608, 32777, 0, 12608, 32777, 0, 12608, 32777, 0, 12624, 32777, 0, 12624, 32777, 0, 12624, 32777, 0, 17088, 36873, 0, 17088, 36873, 0, 17088, 36873, 0, 17088, 36873, 0, 17104, 36873, 0, 17104, 36873, 0, 17104, 36873, 0, 17104, 36873, 0, 17792, 36865, 0, 17792, 36865, 0, 17792, 36865, 0, 17808, 36865, 0, 17808, 36865, 0, 17808, 36865, 0, 18368, 1040, 0, 18368, 1040, 0, 18688, 18724, 0, 18688, 18724, 0, 18688, 18724, 0, 18688, 18724, 0, 18688, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574288879876328_68_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574288879876328_68_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cac45702 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574288879876328_68_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,331 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() < 4)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((247 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 414 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4352, 9, 0, 4352, 9, 0, 4368, 9, 0, 4368, 9, 0, 4384, 9, 0, 4384, 9, 0, 5696, 33288, 0, 5696, 33288, 0, 5696, 33288, 0, 5712, 33288, 0, 5712, 33288, 0, 5712, 33288, 0, 5728, 33288, 0, 5728, 33288, 0, 5728, 33288, 0, 6272, 4160, 0, 6272, 4160, 0, 6288, 4160, 0, 6288, 4160, 0, 6304, 4160, 0, 6304, 4160, 0, 6720, 9, 0, 6720, 9, 0, 6736, 9, 0, 6736, 9, 0, 6752, 9, 0, 6752, 9, 0, 7296, 1040, 0, 7296, 1040, 0, 10624, 18724, 0, 10624, 18724, 0, 10624, 18724, 0, 10624, 18724, 0, 10624, 18724, 0, 11520, 73, 0, 11520, 73, 0, 11520, 73, 0, 13248, 8208, 0, 13248, 8208, 0, 13264, 8208, 0, 13264, 8208, 0, 13280, 8208, 0, 13280, 8208, 0, 14148, 9216, 0, 14148, 9216, 0, 14152, 9216, 0, 14152, 9216, 0, 14156, 9216, 0, 14156, 9216, 0, 14164, 9216, 0, 14164, 9216, 0, 14168, 9216, 0, 14168, 9216, 0, 14172, 9216, 0, 14172, 9216, 0, 14180, 9216, 0, 14180, 9216, 0, 14184, 9216, 0, 14184, 9216, 0, 14188, 9216, 0, 14188, 9216, 0, 16768, 1024, 0, 16784, 1024, 0, 16800, 1024, 0, 17088, 18724, 0, 17088, 18724, 0, 17088, 18724, 0, 17088, 18724, 0, 17088, 18724, 0, 4352, 9, 0, 4352, 9, 0, 4368, 9, 0, 4368, 9, 0, 4384, 9, 0, 4384, 9, 0, 5696, 33288, 0, 5696, 33288, 0, 5696, 33288, 0, 5712, 33288, 0, 5712, 33288, 0, 5712, 33288, 0, 5728, 33288, 0, 5728, 33288, 0, 5728, 33288, 0, 6272, 4160, 0, 6272, 4160, 0, 6288, 4160, 0, 6288, 4160, 0, 6304, 4160, 0, 6304, 4160, 0, 6720, 9, 0, 6720, 9, 0, 6736, 9, 0, 6736, 9, 0, 6752, 9, 0, 6752, 9, 0, 7296, 1040, 0, 7296, 1040, 0, 10624, 18724, 0, 10624, 18724, 0, 10624, 18724, 0, 10624, 18724, 0, 10624, 18724, 0, 11520, 73, 0, 11520, 73, 0, 11520, 73, 0, 13248, 8208, 0, 13248, 8208, 0, 13264, 8208, 0, 13264, 8208, 0, 13280, 8208, 0, 13280, 8208, 0, 14148, 9216, 0, 14148, 9216, 0, 14152, 9216, 0, 14152, 9216, 0, 14156, 9216, 0, 14156, 9216, 0, 14164, 9216, 0, 14164, 9216, 0, 14168, 9216, 0, 14168, 9216, 0, 14172, 9216, 0, 14172, 9216, 0, 14180, 9216, 0, 14180, 9216, 0, 14184, 9216, 0, 14184, 9216, 0, 14188, 9216, 0, 14188, 9216, 0, 16768, 1024, 0, 16784, 1024, 0, 16800, 1024, 0, 17088, 18724, 0, 17088, 18724, 0, 17088, 18724, 0, 17088, 18724, 0, 17088, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574309321612609_69_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574309321612609_69_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..54a52f71 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574309321612609_69_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,264 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((207 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 3328, 1, 0, 5328, 4368, 0, 5328, 4368, 0, 5328, 4368, 0, 5344, 4368, 0, 5344, 4368, 0, 5344, 4368, 0, 5360, 4368, 0, 5360, 4368, 0, 5360, 4368, 0, 5904, 4368, 0, 5904, 4368, 0, 5904, 4368, 0, 5920, 4368, 0, 5920, 4368, 0, 5920, 4368, 0, 5936, 4368, 0, 5936, 4368, 0, 5936, 4368, 0, 6848, 256, 0, 7744, 17476, 0, 7744, 17476, 0, 7744, 17476, 0, 7744, 17476, 0, 8192, 34952, 0, 8192, 34952, 0, 8192, 34952, 0, 8192, 34952, 0, 9808, 16385, 0, 9808, 16385, 0, 9824, 16385, 0, 9824, 16385, 0, 14928, 1, 0, 14944, 1, 0, 1280, 1, 0, 3328, 1, 0, 5328, 4368, 0, 5328, 4368, 0, 5328, 4368, 0, 5344, 4368, 0, 5344, 4368, 0, 5344, 4368, 0, 5360, 4368, 0, 5360, 4368, 0, 5360, 4368, 0, 5904, 4368, 0, 5904, 4368, 0, 5904, 4368, 0, 5920, 4368, 0, 5920, 4368, 0, 5920, 4368, 0, 5936, 4368, 0, 5936, 4368, 0, 5936, 4368, 0, 6848, 256, 0, 7744, 17476, 0, 7744, 17476, 0, 7744, 17476, 0, 7744, 17476, 0, 8192, 34952, 0, 8192, 34952, 0, 8192, 34952, 0, 8192, 34952, 0, 9808, 16385, 0, 9808, 16385, 0, 9824, 16385, 0, 9824, 16385, 0, 14928, 1, 0, 14944, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574310652154070_70_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574310652154070_70_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c5c5c0a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574310652154070_70_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,288 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 6)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((330 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [10880, 64, 0, 10896, 64, 0, 13184, 2, 0, 13888, 2, 0, 15552, 1040, 0, 15552, 1040, 0, 15568, 1040, 0, 15568, 1040, 0, 15584, 1040, 0, 15584, 1040, 0, 17600, 2080, 0, 17600, 2080, 0, 17616, 2080, 0, 17616, 2080, 0, 17632, 2080, 0, 17632, 2080, 0, 18752, 4, 0, 18768, 4, 0, 18784, 4, 0, 20544, 256, 0, 20560, 256, 0, 20576, 256, 0, 21120, 2080, 0, 21120, 2080, 0, 21136, 2080, 0, 21136, 2080, 0, 21152, 2080, 0, 21152, 2080, 0, 10880, 64, 0, 10896, 64, 0, 13184, 2, 0, 13888, 2, 0, 15552, 1040, 0, 15552, 1040, 0, 15568, 1040, 0, 15568, 1040, 0, 15584, 1040, 0, 15584, 1040, 0, 17600, 2080, 0, 17600, 2080, 0, 17616, 2080, 0, 17616, 2080, 0, 17632, 2080, 0, 17632, 2080, 0, 18752, 4, 0, 18768, 4, 0, 18784, 4, 0, 20544, 256, 0, 20560, 256, 0, 20576, 256, 0, 21120, 2080, 0, 21120, 2080, 0, 21136, 2080, 0, 21136, 2080, 0, 21152, 2080, 0, 21152, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574326360570704_72_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574326360570704_72_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4ef0583b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574326360570704_72_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,213 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1872, 8194, 0, 1872, 8194, 0, 1888, 8194, 0, 1888, 8194, 0, 2772, 2, 0, 2776, 2, 0, 2780, 2, 0, 2788, 2, 0, 2792, 2, 0, 2796, 2, 0, 3220, 8192, 0, 3224, 8192, 0, 3228, 8192, 0, 3236, 8192, 0, 3240, 8192, 0, 3244, 8192, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 5120, 61441, 0, 5120, 61441, 0, 5120, 61441, 0, 5120, 61441, 0, 5120, 61441, 0, 8832, 73, 0, 8832, 73, 0, 8832, 73, 0, 9408, 1040, 0, 9408, 1040, 0, 9728, 18724, 0, 9728, 18724, 0, 9728, 18724, 0, 9728, 18724, 0, 9728, 18724, 0, 576, 17, 0, 576, 17, 0, 1872, 8194, 0, 1872, 8194, 0, 1888, 8194, 0, 1888, 8194, 0, 2772, 2, 0, 2776, 2, 0, 2780, 2, 0, 2788, 2, 0, 2792, 2, 0, 2796, 2, 0, 3220, 8192, 0, 3224, 8192, 0, 3228, 8192, 0, 3236, 8192, 0, 3240, 8192, 0, 3244, 8192, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3520, 26214, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 3968, 61166, 0, 5120, 61441, 0, 5120, 61441, 0, 5120, 61441, 0, 5120, 61441, 0, 5120, 61441, 0, 8832, 73, 0, 8832, 73, 0, 8832, 73, 0, 9408, 1040, 0, 9408, 1040, 0, 9728, 18724, 0, 9728, 18724, 0, 9728, 18724, 0, 9728, 18724, 0, 9728, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574376312755600_74_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574376312755600_74_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0df9ff74 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574376312755600_74_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3456, 1, 0, 4928, 1040, 0, 4928, 1040, 0, 5248, 18724, 0, 5248, 18724, 0, 5248, 18724, 0, 5248, 18724, 0, 5248, 18724, 0, 8640, 1040, 0, 8640, 1040, 0, 8960, 16644, 0, 8960, 16644, 0, 8960, 16644, 0, 3456, 1, 0, 4928, 1040, 0, 4928, 1040, 0, 5248, 18724, 0, 5248, 18724, 0, 5248, 18724, 0, 5248, 18724, 0, 5248, 18724, 0, 8640, 1040, 0, 8640, 1040, 0, 8960, 16644, 0, 8960, 16644, 0, 8960, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574379568260493_76_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574379568260493_76_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..92211933 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574379568260493_76_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,97 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574379672146314_77_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574379672146314_77_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b7980658 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574379672146314_77_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,339 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15))) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 2080, 0, 1600, 2080, 0, 1344, 260, 0, 1344, 260, 0, 2880, 33288, 0, 2880, 33288, 0, 2880, 33288, 0, 2896, 33288, 0, 2896, 33288, 0, 2896, 33288, 0, 2912, 33288, 0, 2912, 33288, 0, 2912, 33288, 0, 4032, 32769, 0, 4032, 32769, 0, 4048, 32769, 0, 4048, 32769, 0, 4064, 32769, 0, 4064, 32769, 0, 4736, 4161, 0, 4736, 4161, 0, 4736, 4161, 0, 4752, 4161, 0, 4752, 4161, 0, 4752, 4161, 0, 4768, 4161, 0, 4768, 4161, 0, 4768, 4161, 0, 5504, 1040, 0, 5504, 1040, 0, 12864, 18724, 0, 12864, 18724, 0, 12864, 18724, 0, 12864, 18724, 0, 12864, 18724, 0, 17600, 1, 0, 19152, 16, 0, 19168, 16, 0, 19184, 16, 0, 20032, 17476, 0, 20032, 17476, 0, 20032, 17476, 0, 20032, 17476, 0, 20480, 34952, 0, 20480, 34952, 0, 20480, 34952, 0, 20480, 34952, 0, 1600, 2080, 0, 1600, 2080, 0, 1344, 260, 0, 1344, 260, 0, 2880, 33288, 0, 2880, 33288, 0, 2880, 33288, 0, 2896, 33288, 0, 2896, 33288, 0, 2896, 33288, 0, 2912, 33288, 0, 2912, 33288, 0, 2912, 33288, 0, 4032, 32769, 0, 4032, 32769, 0, 4048, 32769, 0, 4048, 32769, 0, 4064, 32769, 0, 4064, 32769, 0, 4736, 4161, 0, 4736, 4161, 0, 4736, 4161, 0, 4752, 4161, 0, 4752, 4161, 0, 4752, 4161, 0, 4768, 4161, 0, 4768, 4161, 0, 4768, 4161, 0, 5504, 1040, 0, 5504, 1040, 0, 12864, 18724, 0, 12864, 18724, 0, 12864, 18724, 0, 12864, 18724, 0, 12864, 18724, 0, 17600, 1, 0, 19152, 16, 0, 19168, 16, 0, 19184, 16, 0, 20032, 17476, 0, 20032, 17476, 0, 20032, 17476, 0, 20032, 17476, 0, 20480, 34952, 0, 20480, 34952, 0, 20480, 34952, 0, 20480, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574381977323257_78_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574381977323257_78_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..775e80c4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574381977323257_78_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 36, 0, 2368, 36, 0, 3920, 8192, 0, 3936, 8192, 0, 3952, 8192, 0, 4224, 4, 0, 8896, 1024, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8256, 256, 0, 7872, 64, 0, 2368, 36, 0, 2368, 36, 0, 3920, 8192, 0, 3936, 8192, 0, 3952, 8192, 0, 4224, 4, 0, 8896, 1024, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8640, 43690, 0, 8256, 256, 0, 7872, 64, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574382415579551_79_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574382415579551_79_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09641055 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574382415579551_79_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,215 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((211 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 690 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 3392, 15, 0, 3392, 15, 0, 3392, 15, 0, 3392, 15, 0, 5120, 2, 0, 5136, 2, 0, 5152, 2, 0, 6336, 15, 0, 6336, 15, 0, 6336, 15, 0, 6336, 15, 0, 6340, 15, 0, 6340, 15, 0, 6340, 15, 0, 6340, 15, 0, 6344, 15, 0, 6344, 15, 0, 6344, 15, 0, 6344, 15, 0, 6352, 15, 0, 6352, 15, 0, 6352, 15, 0, 6352, 15, 0, 6356, 15, 0, 6356, 15, 0, 6356, 15, 0, 6356, 15, 0, 6360, 15, 0, 6360, 15, 0, 6360, 15, 0, 6360, 15, 0, 6368, 15, 0, 6368, 15, 0, 6368, 15, 0, 6368, 15, 0, 6372, 15, 0, 6372, 15, 0, 6372, 15, 0, 6372, 15, 0, 6376, 15, 0, 6376, 15, 0, 6376, 15, 0, 6376, 15, 0, 7168, 7, 0, 7168, 7, 0, 7168, 7, 0, 7172, 7, 0, 7172, 7, 0, 7172, 7, 0, 7176, 7, 0, 7176, 7, 0, 7176, 7, 0, 7184, 7, 0, 7184, 7, 0, 7184, 7, 0, 7188, 7, 0, 7188, 7, 0, 7188, 7, 0, 7192, 7, 0, 7192, 7, 0, 7192, 7, 0, 7200, 7, 0, 7200, 7, 0, 7200, 7, 0, 7204, 7, 0, 7204, 7, 0, 7204, 7, 0, 7208, 7, 0, 7208, 7, 0, 7208, 7, 0, 7616, 3, 0, 7616, 3, 0, 9984, 4112, 0, 9984, 4112, 0, 11072, 2, 0, 11088, 2, 0, 11104, 2, 0, 12544, 32, 0, 12548, 32, 0, 12552, 32, 0, 12560, 32, 0, 12564, 32, 0, 12568, 32, 0, 12576, 32, 0, 12580, 32, 0, 12584, 32, 0, 13504, 512, 0, 13508, 512, 0, 13512, 512, 0, 13520, 512, 0, 13524, 512, 0, 13528, 512, 0, 13536, 512, 0, 13540, 512, 0, 13544, 512, 0, 13952, 42, 0, 13952, 42, 0, 13952, 42, 0, 13968, 42, 0, 13968, 42, 0, 13968, 42, 0, 13984, 42, 0, 13984, 42, 0, 13984, 42, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 2624, 57359, 0, 3392, 15, 0, 3392, 15, 0, 3392, 15, 0, 3392, 15, 0, 5120, 2, 0, 5136, 2, 0, 5152, 2, 0, 6336, 15, 0, 6336, 15, 0, 6336, 15, 0, 6336, 15, 0, 6340, 15, 0, 6340, 15, 0, 6340, 15, 0, 6340, 15, 0, 6344, 15, 0, 6344, 15, 0, 6344, 15, 0, 6344, 15, 0, 6352, 15, 0, 6352, 15, 0, 6352, 15, 0, 6352, 15, 0, 6356, 15, 0, 6356, 15, 0, 6356, 15, 0, 6356, 15, 0, 6360, 15, 0, 6360, 15, 0, 6360, 15, 0, 6360, 15, 0, 6368, 15, 0, 6368, 15, 0, 6368, 15, 0, 6368, 15, 0, 6372, 15, 0, 6372, 15, 0, 6372, 15, 0, 6372, 15, 0, 6376, 15, 0, 6376, 15, 0, 6376, 15, 0, 6376, 15, 0, 7168, 7, 0, 7168, 7, 0, 7168, 7, 0, 7172, 7, 0, 7172, 7, 0, 7172, 7, 0, 7176, 7, 0, 7176, 7, 0, 7176, 7, 0, 7184, 7, 0, 7184, 7, 0, 7184, 7, 0, 7188, 7, 0, 7188, 7, 0, 7188, 7, 0, 7192, 7, 0, 7192, 7, 0, 7192, 7, 0, 7200, 7, 0, 7200, 7, 0, 7200, 7, 0, 7204, 7, 0, 7204, 7, 0, 7204, 7, 0, 7208, 7, 0, 7208, 7, 0, 7208, 7, 0, 7616, 3, 0, 7616, 3, 0, 9984, 4112, 0, 9984, 4112, 0, 11072, 2, 0, 11088, 2, 0, 11104, 2, 0, 12544, 32, 0, 12548, 32, 0, 12552, 32, 0, 12560, 32, 0, 12564, 32, 0, 12568, 32, 0, 12576, 32, 0, 12580, 32, 0, 12584, 32, 0, 13504, 512, 0, 13508, 512, 0, 13512, 512, 0, 13520, 512, 0, 13524, 512, 0, 13528, 512, 0, 13536, 512, 0, 13540, 512, 0, 13544, 512, 0, 13952, 42, 0, 13952, 42, 0, 13952, 42, 0, 13968, 42, 0, 13968, 42, 0, 13968, 42, 0, 13984, 42, 0, 13984, 42, 0, 13984, 42, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574412343597610_80_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574412343597610_80_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e9a05cad --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574412343597610_80_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,332 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((110 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1488, 10, 0, 1488, 10, 0, 1504, 10, 0, 1504, 10, 0, 1520, 10, 0, 1520, 10, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 5520, 32896, 0, 5520, 32896, 0, 5524, 32896, 0, 5524, 32896, 0, 5528, 32896, 0, 5528, 32896, 0, 5536, 32896, 0, 5536, 32896, 0, 5540, 32896, 0, 5540, 32896, 0, 5544, 32896, 0, 5544, 32896, 0, 7056, 32, 0, 7060, 32, 0, 7064, 32, 0, 7072, 32, 0, 7076, 32, 0, 7080, 32, 0, 8144, 32, 0, 8148, 32, 0, 8152, 32, 0, 8160, 32, 0, 8164, 32, 0, 8168, 32, 0, 8592, 2, 0, 8596, 2, 0, 8600, 2, 0, 8608, 2, 0, 8612, 2, 0, 8616, 2, 0, 11024, 4097, 0, 11024, 4097, 0, 11040, 4097, 0, 11040, 4097, 0, 11056, 4097, 0, 11056, 4097, 0, 11728, 1, 0, 11744, 1, 0, 11760, 1, 0, 15040, 256, 0, 15936, 17476, 0, 15936, 17476, 0, 15936, 17476, 0, 15936, 17476, 0, 16384, 34952, 0, 16384, 34952, 0, 16384, 34952, 0, 16384, 34952, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1488, 10, 0, 1488, 10, 0, 1504, 10, 0, 1504, 10, 0, 1520, 10, 0, 1520, 10, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 2624, 43690, 0, 5520, 32896, 0, 5520, 32896, 0, 5524, 32896, 0, 5524, 32896, 0, 5528, 32896, 0, 5528, 32896, 0, 5536, 32896, 0, 5536, 32896, 0, 5540, 32896, 0, 5540, 32896, 0, 5544, 32896, 0, 5544, 32896, 0, 7056, 32, 0, 7060, 32, 0, 7064, 32, 0, 7072, 32, 0, 7076, 32, 0, 7080, 32, 0, 8144, 32, 0, 8148, 32, 0, 8152, 32, 0, 8160, 32, 0, 8164, 32, 0, 8168, 32, 0, 8592, 2, 0, 8596, 2, 0, 8600, 2, 0, 8608, 2, 0, 8612, 2, 0, 8616, 2, 0, 11024, 4097, 0, 11024, 4097, 0, 11040, 4097, 0, 11040, 4097, 0, 11056, 4097, 0, 11056, 4097, 0, 11728, 1, 0, 11744, 1, 0, 11760, 1, 0, 15040, 256, 0, 15936, 17476, 0, 15936, 17476, 0, 15936, 17476, 0, 15936, 17476, 0, 16384, 34952, 0, 16384, 34952, 0, 16384, 34952, 0, 16384, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574431839469054_81_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574431839469054_81_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44ca5894 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574431839469054_81_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,367 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((259 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((270 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 4224, 18724, 0, 4224, 18724, 0, 4224, 18724, 0, 4224, 18724, 0, 4224, 18724, 0, 8064, 4096, 0, 8080, 4096, 0, 8896, 4096, 0, 8912, 4096, 0, 14656, 17476, 0, 14656, 17476, 0, 14656, 17476, 0, 14656, 17476, 0, 16576, 32768, 0, 16580, 32768, 0, 16592, 32768, 0, 16596, 32768, 0, 17280, 32768, 0, 17284, 32768, 0, 17296, 32768, 0, 17300, 32768, 0, 21696, 2048, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 4224, 18724, 0, 4224, 18724, 0, 4224, 18724, 0, 4224, 18724, 0, 4224, 18724, 0, 8064, 4096, 0, 8080, 4096, 0, 8896, 4096, 0, 8912, 4096, 0, 14656, 17476, 0, 14656, 17476, 0, 14656, 17476, 0, 14656, 17476, 0, 16576, 32768, 0, 16580, 32768, 0, 16592, 32768, 0, 16596, 32768, 0, 17280, 32768, 0, 17284, 32768, 0, 17296, 32768, 0, 17300, 32768, 0, 21696, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574435536151424_82_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574435536151424_82_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2bfb4e49 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574435536151424_82_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 16448, 0, 1856, 16448, 0, 1856, 16448, 0, 1856, 16448, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574435624765356_83_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574435624765356_83_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d892d566 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574435624765356_83_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,166 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2432, 35073, 0, 2432, 35073, 0, 2432, 35073, 0, 2432, 35073, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0, 2432, 35073, 0, 2432, 35073, 0, 2432, 35073, 0, 2432, 35073, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0, 2048, 28702, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574435754076551_84_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574435754076551_84_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bb040f09 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574435754076551_84_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,261 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 704, 85, 0, 704, 85, 0, 704, 85, 0, 704, 85, 0, 2432, 1, 0, 3072, 1, 0, 3520, 1, 0, 4416, 17476, 0, 4416, 17476, 0, 4416, 17476, 0, 4416, 17476, 0, 4864, 34952, 0, 4864, 34952, 0, 4864, 34952, 0, 4864, 34952, 0, 5760, 17, 0, 5760, 17, 0, 8320, 1024, 0, 11904, 34952, 0, 11904, 34952, 0, 11904, 34952, 0, 11904, 34952, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 1088, 65280, 0, 704, 85, 0, 704, 85, 0, 704, 85, 0, 704, 85, 0, 2432, 1, 0, 3072, 1, 0, 3520, 1, 0, 4416, 17476, 0, 4416, 17476, 0, 4416, 17476, 0, 4416, 17476, 0, 4864, 34952, 0, 4864, 34952, 0, 4864, 34952, 0, 4864, 34952, 0, 5760, 17, 0, 5760, 17, 0, 8320, 1024, 0, 11904, 34952, 0, 11904, 34952, 0, 11904, 34952, 0, 11904, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574444837993935_86_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574444837993935_86_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..846bce54 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574444837993935_86_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,183 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0, 848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 848, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 864, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0, 880, 65024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574452808692204_88_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574452808692204_88_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f102f667 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574452808692204_88_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5312, 17, 0, 5312, 17, 0, 8592, 64, 0, 8608, 64, 0, 8624, 64, 0, 9024, 34952, 0, 9024, 34952, 0, 9024, 34952, 0, 9024, 34952, 0, 9664, 73, 0, 9664, 73, 0, 9664, 73, 0, 15808, 288, 0, 15808, 288, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5312, 17, 0, 5312, 17, 0, 8592, 64, 0, 8608, 64, 0, 8624, 64, 0, 9024, 34952, 0, 9024, 34952, 0, 9024, 34952, 0, 9024, 34952, 0, 9664, 73, 0, 9664, 73, 0, 9664, 73, 0, 15808, 288, 0, 15808, 288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574513394560468_93_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574513394560468_93_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0ab20978 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574513394560468_93_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 14)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 32768, 0, 1232, 32768, 0, 3840, 32768, 0, 3856, 32768, 0, 4480, 32768, 0, 1216, 32768, 0, 1232, 32768, 0, 3840, 32768, 0, 3856, 32768, 0, 4480, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574513539010320_94_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574513539010320_94_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4246e4d8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574513539010320_94_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,339 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2880, 1040, 0, 2880, 1040, 0, 2896, 1040, 0, 2896, 1040, 0, 4624, 2, 0, 4640, 2, 0, 10240, 18724, 0, 10240, 18724, 0, 10240, 18724, 0, 10240, 18724, 0, 10240, 18724, 0, 13376, 17, 0, 13376, 17, 0, 16256, 4369, 0, 16256, 4369, 0, 16256, 4369, 0, 16256, 4369, 0, 16576, 17476, 0, 16576, 17476, 0, 16576, 17476, 0, 16576, 17476, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2880, 1040, 0, 2880, 1040, 0, 2896, 1040, 0, 2896, 1040, 0, 4624, 2, 0, 4640, 2, 0, 10240, 18724, 0, 10240, 18724, 0, 10240, 18724, 0, 10240, 18724, 0, 10240, 18724, 0, 13376, 17, 0, 13376, 17, 0, 16256, 4369, 0, 16256, 4369, 0, 16256, 4369, 0, 16256, 4369, 0, 16576, 17476, 0, 16576, 17476, 0, 16576, 17476, 0, 16576, 17476, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574530854058446_96_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574530854058446_96_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db28ef3d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574530854058446_96_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 11)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 61440, 0, 1088, 61440, 0, 1088, 61440, 0, 1088, 61440, 0, 1104, 61440, 0, 1104, 61440, 0, 1104, 61440, 0, 1104, 61440, 0, 1120, 61440, 0, 1120, 61440, 0, 1120, 61440, 0, 1120, 61440, 0, 1088, 61440, 0, 1088, 61440, 0, 1088, 61440, 0, 1088, 61440, 0, 1104, 61440, 0, 1104, 61440, 0, 1104, 61440, 0, 1104, 61440, 0, 1120, 61440, 0, 1120, 61440, 0, 1120, 61440, 0, 1120, 61440, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574531008966881_97_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574531008966881_97_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..79a7ad35 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574531008966881_97_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574531141632850_98_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574531141632850_98_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff78db52 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574531141632850_98_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 11)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((150 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 8656, 1156, 0, 8656, 1156, 0, 8656, 1156, 0, 8672, 1156, 0, 8672, 1156, 0, 8672, 1156, 0, 9616, 1, 0, 9620, 1, 0, 9632, 1, 0, 9636, 1, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 8656, 1156, 0, 8656, 1156, 0, 8656, 1156, 0, 8672, 1156, 0, 8672, 1156, 0, 8672, 1156, 0, 9616, 1, 0, 9620, 1, 0, 9632, 1, 0, 9636, 1, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10192, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0, 10208, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574559790209186_100_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574559790209186_100_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a035afef --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574559790209186_100_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,349 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6352, 8192, 0, 8000, 34, 0, 8000, 34, 0, 8320, 17476, 0, 8320, 17476, 0, 8320, 17476, 0, 8320, 17476, 0, 8768, 34952, 0, 8768, 34952, 0, 8768, 34952, 0, 8768, 34952, 0, 9664, 17, 0, 9664, 17, 0, 10560, 17476, 0, 10560, 17476, 0, 10560, 17476, 0, 10560, 17476, 0, 11008, 34952, 0, 11008, 34952, 0, 11008, 34952, 0, 11008, 34952, 0, 15104, 4096, 0, 15120, 4096, 0, 15136, 4096, 0, 19024, 8322, 0, 19024, 8322, 0, 19024, 8322, 0, 19904, 1040, 0, 19904, 1040, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 576, 17, 0, 576, 17, 0, 6352, 8192, 0, 8000, 34, 0, 8000, 34, 0, 8320, 17476, 0, 8320, 17476, 0, 8320, 17476, 0, 8320, 17476, 0, 8768, 34952, 0, 8768, 34952, 0, 8768, 34952, 0, 8768, 34952, 0, 9664, 17, 0, 9664, 17, 0, 10560, 17476, 0, 10560, 17476, 0, 10560, 17476, 0, 10560, 17476, 0, 11008, 34952, 0, 11008, 34952, 0, 11008, 34952, 0, 11008, 34952, 0, 15104, 4096, 0, 15120, 4096, 0, 15136, 4096, 0, 19024, 8322, 0, 19024, 8322, 0, 19024, 8322, 0, 19904, 1040, 0, 19904, 1040, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0, 20224, 28086, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574580788977571_103_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574580788977571_103_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..322ff823 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574580788977571_103_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 11)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3472, 9218, 0, 3472, 9218, 0, 3472, 9218, 0, 3476, 9218, 0, 3476, 9218, 0, 3476, 9218, 0, 4816, 1168, 0, 4816, 1168, 0, 4816, 1168, 0, 4820, 1168, 0, 4820, 1168, 0, 4820, 1168, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 6144, 73, 0, 6144, 73, 0, 6144, 73, 0, 6720, 1040, 0, 6720, 1040, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3472, 9218, 0, 3472, 9218, 0, 3472, 9218, 0, 3476, 9218, 0, 3476, 9218, 0, 3476, 9218, 0, 4816, 1168, 0, 4816, 1168, 0, 4816, 1168, 0, 4820, 1168, 0, 4820, 1168, 0, 4820, 1168, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 6144, 73, 0, 6144, 73, 0, 6144, 73, 0, 6720, 1040, 0, 6720, 1040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574581369708090_104_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574581369708090_104_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..df2c52e3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574581369708090_104_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,216 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 5760, 32, 0, 5764, 32, 0, 5768, 32, 0, 5776, 32, 0, 5780, 32, 0, 5784, 32, 0, 5792, 32, 0, 5796, 32, 0, 5800, 32, 0, 6464, 2, 0, 6468, 2, 0, 6472, 2, 0, 6480, 2, 0, 6484, 2, 0, 6488, 2, 0, 6496, 2, 0, 6500, 2, 0, 6504, 2, 0, 7376, 4, 0, 7392, 4, 0, 12288, 34952, 0, 12288, 34952, 0, 12288, 34952, 0, 12288, 34952, 0, 576, 17, 0, 576, 17, 0, 5760, 32, 0, 5764, 32, 0, 5768, 32, 0, 5776, 32, 0, 5780, 32, 0, 5784, 32, 0, 5792, 32, 0, 5796, 32, 0, 5800, 32, 0, 6464, 2, 0, 6468, 2, 0, 6472, 2, 0, 6480, 2, 0, 6484, 2, 0, 6488, 2, 0, 6496, 2, 0, 6500, 2, 0, 6504, 2, 0, 7376, 4, 0, 7392, 4, 0, 12288, 34952, 0, 12288, 34952, 0, 12288, 34952, 0, 12288, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574604864301946_106_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574604864301946_106_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f3d60e78 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574604864301946_106_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8192, 128, 0, 8208, 128, 0, 8896, 49153, 0, 8896, 49153, 0, 8896, 49153, 0, 10064, 49159, 0, 10064, 49159, 0, 10064, 49159, 0, 10064, 49159, 0, 10064, 49159, 0, 10080, 49159, 0, 10080, 49159, 0, 10080, 49159, 0, 10080, 49159, 0, 10080, 49159, 0, 10896, 49175, 0, 10896, 49175, 0, 10896, 49175, 0, 10896, 49175, 0, 10896, 49175, 0, 10896, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 8192, 128, 0, 8208, 128, 0, 8896, 49153, 0, 8896, 49153, 0, 8896, 49153, 0, 10064, 49159, 0, 10064, 49159, 0, 10064, 49159, 0, 10064, 49159, 0, 10064, 49159, 0, 10080, 49159, 0, 10080, 49159, 0, 10080, 49159, 0, 10080, 49159, 0, 10080, 49159, 0, 10896, 49175, 0, 10896, 49175, 0, 10896, 49175, 0, 10896, 49175, 0, 10896, 49175, 0, 10896, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 10912, 49175, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0, 12416, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574613393109807_108_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574613393109807_108_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3affe154 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574613393109807_108_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,318 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7488, 2, 0, 7504, 2, 0, 7520, 2, 0, 11264, 8360, 0, 11264, 8360, 0, 11264, 8360, 0, 11264, 8360, 0, 15104, 85, 0, 15104, 85, 0, 15104, 85, 0, 15104, 85, 0, 15744, 8, 0, 16640, 2080, 0, 16640, 2080, 0, 7488, 2, 0, 7504, 2, 0, 7520, 2, 0, 11264, 8360, 0, 11264, 8360, 0, 11264, 8360, 0, 11264, 8360, 0, 15104, 85, 0, 15104, 85, 0, 15104, 85, 0, 15104, 85, 0, 15744, 8, 0, 16640, 2080, 0, 16640, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574613961187138_109_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574613961187138_109_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eeffe4e4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574613961187138_109_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2128, 9216, 0, 2128, 9216, 0, 2144, 9216, 0, 2144, 9216, 0, 2160, 9216, 0, 2160, 9216, 0, 3008, 2048, 0, 4160, 16384, 0, 5504, 512, 0, 6976, 580, 0, 6976, 580, 0, 6976, 580, 0, 6992, 580, 0, 6992, 580, 0, 6992, 580, 0, 7008, 580, 0, 7008, 580, 0, 7008, 580, 0, 7936, 144, 0, 7936, 144, 0, 7952, 144, 0, 7952, 144, 0, 7968, 144, 0, 7968, 144, 0, 9360, 1064, 0, 9360, 1064, 0, 9360, 1064, 0, 2128, 9216, 0, 2128, 9216, 0, 2144, 9216, 0, 2144, 9216, 0, 2160, 9216, 0, 2160, 9216, 0, 3008, 2048, 0, 4160, 16384, 0, 5504, 512, 0, 6976, 580, 0, 6976, 580, 0, 6976, 580, 0, 6992, 580, 0, 6992, 580, 0, 6992, 580, 0, 7008, 580, 0, 7008, 580, 0, 7008, 580, 0, 7936, 144, 0, 7936, 144, 0, 7952, 144, 0, 7952, 144, 0, 7968, 144, 0, 7968, 144, 0, 9360, 1064, 0, 9360, 1064, 0, 9360, 1064, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574615249957898_110_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574615249957898_110_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7034ee2b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574615249957898_110_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 15)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 32768, 0, 1812, 32768, 0, 1816, 32768, 0, 1824, 32768, 0, 1828, 32768, 0, 1832, 32768, 0, 1840, 32768, 0, 1844, 32768, 0, 1848, 32768, 0, 6288, 32768, 0, 6292, 32768, 0, 6296, 32768, 0, 6304, 32768, 0, 6308, 32768, 0, 6312, 32768, 0, 6320, 32768, 0, 6324, 32768, 0, 6328, 32768, 0, 6992, 32768, 0, 7008, 32768, 0, 7024, 32768, 0, 7424, 32768, 0, 10816, 4096, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10048, 10, 0, 10048, 10, 0, 9536, 32, 0, 1808, 32768, 0, 1812, 32768, 0, 1816, 32768, 0, 1824, 32768, 0, 1828, 32768, 0, 1832, 32768, 0, 1840, 32768, 0, 1844, 32768, 0, 1848, 32768, 0, 6288, 32768, 0, 6292, 32768, 0, 6296, 32768, 0, 6304, 32768, 0, 6308, 32768, 0, 6312, 32768, 0, 6320, 32768, 0, 6324, 32768, 0, 6328, 32768, 0, 6992, 32768, 0, 7008, 32768, 0, 7024, 32768, 0, 7424, 32768, 0, 10816, 4096, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10432, 17749, 0, 10048, 10, 0, 10048, 10, 0, 9536, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574615865889525_111_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574615865889525_111_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..212a87dd --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574615865889525_111_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1792, 64, 0, 6272, 4, 0, 6720, 34952, 0, 6720, 34952, 0, 6720, 34952, 0, 6720, 34952, 0, 576, 17, 0, 576, 17, 0, 1792, 64, 0, 6272, 4, 0, 6720, 34952, 0, 6720, 34952, 0, 6720, 34952, 0, 6720, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574616072413597_112_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574616072413597_112_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1bb7ae80 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574616072413597_112_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3200, 2, 0, 3216, 2, 0, 3232, 2, 0, 5184, 18724, 0, 5184, 18724, 0, 5184, 18724, 0, 5184, 18724, 0, 5184, 18724, 0, 5824, 85, 0, 5824, 85, 0, 5824, 85, 0, 5824, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3200, 2, 0, 3216, 2, 0, 3232, 2, 0, 5184, 18724, 0, 5184, 18724, 0, 5184, 18724, 0, 5184, 18724, 0, 5184, 18724, 0, 5824, 85, 0, 5824, 85, 0, 5824, 85, 0, 5824, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574616408368048_113_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574616408368048_113_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a26292b0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574616408368048_113_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3840, 18724, 0, 3840, 18724, 0, 3840, 18724, 0, 3840, 18724, 0, 3840, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3840, 18724, 0, 3840, 18724, 0, 3840, 18724, 0, 3840, 18724, 0, 3840, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574616530420775_114_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574616530420775_114_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3c6bb56f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574616530420775_114_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 13)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2244, 57344, 0, 2244, 57344, 0, 2244, 57344, 0, 2248, 57344, 0, 2248, 57344, 0, 2248, 57344, 0, 2260, 57344, 0, 2260, 57344, 0, 2260, 57344, 0, 2264, 57344, 0, 2264, 57344, 0, 2264, 57344, 0, 3456, 4096, 0, 3904, 7936, 0, 3904, 7936, 0, 3904, 7936, 0, 3904, 7936, 0, 3904, 7936, 0, 11344, 3, 0, 11344, 3, 0, 11360, 3, 0, 11360, 3, 0, 14400, 1, 0, 2244, 57344, 0, 2244, 57344, 0, 2244, 57344, 0, 2248, 57344, 0, 2248, 57344, 0, 2248, 57344, 0, 2260, 57344, 0, 2260, 57344, 0, 2260, 57344, 0, 2264, 57344, 0, 2264, 57344, 0, 2264, 57344, 0, 3456, 4096, 0, 3904, 7936, 0, 3904, 7936, 0, 3904, 7936, 0, 3904, 7936, 0, 3904, 7936, 0, 11344, 3, 0, 11344, 3, 0, 11360, 3, 0, 11360, 3, 0, 14400, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574632093014565_117_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574632093014565_117_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..34dd4b06 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574632093014565_117_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,253 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((317 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 4, 0, 2192, 4, 0, 2208, 4, 0, 3328, 4, 0, 3344, 4, 0, 3360, 4, 0, 6400, 57347, 0, 6400, 57347, 0, 6400, 57347, 0, 6400, 57347, 0, 6400, 57347, 0, 13056, 512, 0, 13632, 1040, 0, 13632, 1040, 0, 14672, 10402, 0, 14672, 10402, 0, 14672, 10402, 0, 14672, 10402, 0, 14672, 10402, 0, 14688, 10402, 0, 14688, 10402, 0, 14688, 10402, 0, 14688, 10402, 0, 14688, 10402, 0, 14704, 10402, 0, 14704, 10402, 0, 14704, 10402, 0, 14704, 10402, 0, 14704, 10402, 0, 18704, 16386, 0, 18704, 16386, 0, 18720, 16386, 0, 18720, 16386, 0, 18736, 16386, 0, 18736, 16386, 0, 20304, 16384, 0, 20320, 16384, 0, 20336, 16384, 0, 2176, 4, 0, 2192, 4, 0, 2208, 4, 0, 3328, 4, 0, 3344, 4, 0, 3360, 4, 0, 6400, 57347, 0, 6400, 57347, 0, 6400, 57347, 0, 6400, 57347, 0, 6400, 57347, 0, 13056, 512, 0, 13632, 1040, 0, 13632, 1040, 0, 14672, 10402, 0, 14672, 10402, 0, 14672, 10402, 0, 14672, 10402, 0, 14672, 10402, 0, 14688, 10402, 0, 14688, 10402, 0, 14688, 10402, 0, 14688, 10402, 0, 14688, 10402, 0, 14704, 10402, 0, 14704, 10402, 0, 14704, 10402, 0, 14704, 10402, 0, 14704, 10402, 0, 18704, 16386, 0, 18704, 16386, 0, 18720, 16386, 0, 18720, 16386, 0, 18736, 16386, 0, 18736, 16386, 0, 20304, 16384, 0, 20320, 16384, 0, 20336, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574636830903892_118_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574636830903892_118_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5a31e07 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574636830903892_118_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,268 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((149 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2832, 68, 0, 2832, 68, 0, 2848, 68, 0, 2848, 68, 0, 4736, 34952, 0, 4736, 34952, 0, 4736, 34952, 0, 4736, 34952, 0, 6272, 1, 0, 6848, 1040, 0, 6848, 1040, 0, 8656, 40960, 0, 8656, 40960, 0, 8672, 40960, 0, 8672, 40960, 0, 9556, 512, 0, 9560, 512, 0, 9572, 512, 0, 9576, 512, 0, 10576, 43008, 0, 10576, 43008, 0, 10576, 43008, 0, 10592, 43008, 0, 10592, 43008, 0, 10592, 43008, 0, 12224, 8192, 0, 576, 17, 0, 576, 17, 0, 2832, 68, 0, 2832, 68, 0, 2848, 68, 0, 2848, 68, 0, 4736, 34952, 0, 4736, 34952, 0, 4736, 34952, 0, 4736, 34952, 0, 6272, 1, 0, 6848, 1040, 0, 6848, 1040, 0, 8656, 40960, 0, 8656, 40960, 0, 8672, 40960, 0, 8672, 40960, 0, 9556, 512, 0, 9560, 512, 0, 9572, 512, 0, 9576, 512, 0, 10576, 43008, 0, 10576, 43008, 0, 10576, 43008, 0, 10592, 43008, 0, 10592, 43008, 0, 10592, 43008, 0, 12224, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574642785074864_120_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574642785074864_120_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3306cb6a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574642785074864_120_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,258 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 49153, 0, 1088, 49153, 0, 1088, 49153, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 3136, 8196, 0, 3136, 8196, 0, 11152, 4, 0, 15056, 8192, 0, 15936, 32, 0, 1088, 49153, 0, 1088, 49153, 0, 1088, 49153, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 832, 254, 0, 3136, 8196, 0, 3136, 8196, 0, 11152, 4, 0, 15056, 8192, 0, 15936, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574672741606688_124_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574672741606688_124_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29ab6e9e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574672741606688_124_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 33280, 0, 1728, 33280, 0, 1744, 33280, 0, 1744, 33280, 0, 2624, 2080, 0, 2624, 2080, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 33280, 0, 1728, 33280, 0, 1744, 33280, 0, 1744, 33280, 0, 2624, 2080, 0, 2624, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574672922329574_125_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574672922329574_125_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4832fc24 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574672922329574_125_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,217 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 1, 0, 4864, 64, 0, 5312, 32776, 0, 5312, 32776, 0, 6016, 512, 0, 6592, 5201, 0, 6592, 5201, 0, 6592, 5201, 0, 6592, 5201, 0, 6592, 5201, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 1728, 1, 0, 4864, 64, 0, 5312, 32776, 0, 5312, 32776, 0, 6016, 512, 0, 6592, 5201, 0, 6592, 5201, 0, 6592, 5201, 0, 6592, 5201, 0, 6592, 5201, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574673149711577_126_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574673149711577_126_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f25b0ae4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574673149711577_126_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,292 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 378 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [12880, 1, 0, 12896, 1, 0, 13456, 4352, 0, 13456, 4352, 0, 13472, 4352, 0, 13472, 4352, 0, 14804, 4096, 0, 14808, 4096, 0, 14820, 4096, 0, 14824, 4096, 0, 16848, 4096, 0, 16864, 4096, 0, 17408, 4369, 0, 17408, 4369, 0, 17408, 4369, 0, 17408, 4369, 0, 5376, 43520, 0, 5376, 43520, 0, 5376, 43520, 0, 5376, 43520, 0, 5392, 43520, 0, 5392, 43520, 0, 5392, 43520, 0, 5392, 43520, 0, 7232, 1, 0, 7248, 1, 0, 7264, 1, 0, 8900, 33, 0, 8900, 33, 0, 8904, 33, 0, 8904, 33, 0, 8908, 33, 0, 8908, 33, 0, 8916, 33, 0, 8916, 33, 0, 8920, 33, 0, 8920, 33, 0, 8924, 33, 0, 8924, 33, 0, 8932, 33, 0, 8932, 33, 0, 8936, 33, 0, 8936, 33, 0, 8940, 33, 0, 8940, 33, 0, 12048, 1, 0, 12064, 1, 0, 576, 61440, 0, 576, 61440, 0, 576, 61440, 0, 576, 61440, 0, 1664, 21760, 0, 1664, 21760, 0, 1664, 21760, 0, 1664, 21760, 0, 1680, 21760, 0, 1680, 21760, 0, 1680, 21760, 0, 1680, 21760, 0, 20224, 34952, 0, 20224, 34952, 0, 20224, 34952, 0, 20224, 34952, 0, 12880, 1, 0, 12896, 1, 0, 13456, 4352, 0, 13456, 4352, 0, 13472, 4352, 0, 13472, 4352, 0, 14804, 4096, 0, 14808, 4096, 0, 14820, 4096, 0, 14824, 4096, 0, 16848, 4096, 0, 16864, 4096, 0, 17408, 4369, 0, 17408, 4369, 0, 17408, 4369, 0, 17408, 4369, 0, 5376, 43520, 0, 5376, 43520, 0, 5376, 43520, 0, 5376, 43520, 0, 5392, 43520, 0, 5392, 43520, 0, 5392, 43520, 0, 5392, 43520, 0, 7232, 1, 0, 7248, 1, 0, 7264, 1, 0, 8900, 33, 0, 8900, 33, 0, 8904, 33, 0, 8904, 33, 0, 8908, 33, 0, 8908, 33, 0, 8916, 33, 0, 8916, 33, 0, 8920, 33, 0, 8920, 33, 0, 8924, 33, 0, 8924, 33, 0, 8932, 33, 0, 8932, 33, 0, 8936, 33, 0, 8936, 33, 0, 8940, 33, 0, 8940, 33, 0, 12048, 1, 0, 12064, 1, 0, 576, 61440, 0, 576, 61440, 0, 576, 61440, 0, 576, 61440, 0, 1664, 21760, 0, 1664, 21760, 0, 1664, 21760, 0, 1664, 21760, 0, 1680, 21760, 0, 1680, 21760, 0, 1680, 21760, 0, 1680, 21760, 0, 20224, 34952, 0, 20224, 34952, 0, 20224, 34952, 0, 20224, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574685298121135_127_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574685298121135_127_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d80aece2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574685298121135_127_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,372 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 15)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 5)) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 390 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 2704, 43008, 0, 2704, 43008, 0, 2704, 43008, 0, 2720, 43008, 0, 2720, 43008, 0, 2720, 43008, 0, 4048, 40960, 0, 4048, 40960, 0, 4064, 40960, 0, 4064, 40960, 0, 9024, 1, 0, 9664, 1, 0, 13248, 256, 0, 13824, 4369, 0, 13824, 4369, 0, 13824, 4369, 0, 13824, 4369, 0, 15424, 1, 0, 15440, 1, 0, 16640, 1040, 0, 16640, 1040, 0, 17792, 16388, 0, 17792, 16388, 0, 18960, 16384, 0, 18976, 16384, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1792, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1808, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 1824, 21845, 0, 2704, 43008, 0, 2704, 43008, 0, 2704, 43008, 0, 2720, 43008, 0, 2720, 43008, 0, 2720, 43008, 0, 4048, 40960, 0, 4048, 40960, 0, 4064, 40960, 0, 4064, 40960, 0, 9024, 1, 0, 9664, 1, 0, 13248, 256, 0, 13824, 4369, 0, 13824, 4369, 0, 13824, 4369, 0, 13824, 4369, 0, 15424, 1, 0, 15440, 1, 0, 16640, 1040, 0, 16640, 1040, 0, 17792, 16388, 0, 17792, 16388, 0, 18960, 16384, 0, 18976, 16384, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0, 19392, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574690291026149_128_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574690291026149_128_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f42baf5e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574690291026149_128_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((135 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 774 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 1058, 0, 2304, 1058, 0, 2304, 1058, 0, 2048, 29, 0, 2048, 29, 0, 2048, 29, 0, 2048, 29, 0, 1664, 61440, 0, 1664, 61440, 0, 1664, 61440, 0, 1664, 61440, 0, 2944, 85, 0, 2944, 85, 0, 2944, 85, 0, 2944, 85, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 6160, 63489, 0, 6160, 63489, 0, 6160, 63489, 0, 6160, 63489, 0, 6160, 63489, 0, 6160, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 7952, 8, 0, 7956, 8, 0, 7960, 8, 0, 7968, 8, 0, 7972, 8, 0, 7976, 8, 0, 7984, 8, 0, 7988, 8, 0, 7992, 8, 0, 9488, 49153, 0, 9488, 49153, 0, 9488, 49153, 0, 9492, 49153, 0, 9492, 49153, 0, 9492, 49153, 0, 9496, 49153, 0, 9496, 49153, 0, 9496, 49153, 0, 9504, 49153, 0, 9504, 49153, 0, 9504, 49153, 0, 9508, 49153, 0, 9508, 49153, 0, 9508, 49153, 0, 9512, 49153, 0, 9512, 49153, 0, 9512, 49153, 0, 9520, 49153, 0, 9520, 49153, 0, 9520, 49153, 0, 9524, 49153, 0, 9524, 49153, 0, 9524, 49153, 0, 9528, 49153, 0, 9528, 49153, 0, 9528, 49153, 0, 2304, 1058, 0, 2304, 1058, 0, 2304, 1058, 0, 2048, 29, 0, 2048, 29, 0, 2048, 29, 0, 2048, 29, 0, 1664, 61440, 0, 1664, 61440, 0, 1664, 61440, 0, 1664, 61440, 0, 2944, 85, 0, 2944, 85, 0, 2944, 85, 0, 2944, 85, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4816, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4832, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 4848, 21845, 0, 6160, 63489, 0, 6160, 63489, 0, 6160, 63489, 0, 6160, 63489, 0, 6160, 63489, 0, 6160, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6164, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6168, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6176, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6180, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6184, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6192, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6196, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 6200, 63489, 0, 7952, 8, 0, 7956, 8, 0, 7960, 8, 0, 7968, 8, 0, 7972, 8, 0, 7976, 8, 0, 7984, 8, 0, 7988, 8, 0, 7992, 8, 0, 9488, 49153, 0, 9488, 49153, 0, 9488, 49153, 0, 9492, 49153, 0, 9492, 49153, 0, 9492, 49153, 0, 9496, 49153, 0, 9496, 49153, 0, 9496, 49153, 0, 9504, 49153, 0, 9504, 49153, 0, 9504, 49153, 0, 9508, 49153, 0, 9508, 49153, 0, 9508, 49153, 0, 9512, 49153, 0, 9512, 49153, 0, 9512, 49153, 0, 9520, 49153, 0, 9520, 49153, 0, 9520, 49153, 0, 9524, 49153, 0, 9524, 49153, 0, 9524, 49153, 0, 9528, 49153, 0, 9528, 49153, 0, 9528, 49153, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574728578998174_129_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574728578998174_129_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..33c81651 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574728578998174_129_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 1680, 0, 1856, 1680, 0, 1856, 1680, 0, 1856, 1680, 0, 1856, 1680, 0, 1856, 1680, 0, 1856, 1680, 0, 1856, 1680, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574728675367467_130_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574728675367467_130_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d202e013 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574728675367467_130_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,219 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1728, 8192, 0, 4672, 8192, 0, 4688, 8192, 0, 4704, 8192, 0, 5568, 2, 0, 7104, 32, 0, 8192, 2, 0, 9360, 17408, 0, 9360, 17408, 0, 9376, 17408, 0, 9376, 17408, 0, 10260, 16384, 0, 10264, 16384, 0, 10276, 16384, 0, 10280, 16384, 0, 10880, 34952, 0, 10880, 34952, 0, 10880, 34952, 0, 10880, 34952, 0, 576, 17, 0, 576, 17, 0, 1728, 8192, 0, 4672, 8192, 0, 4688, 8192, 0, 4704, 8192, 0, 5568, 2, 0, 7104, 32, 0, 8192, 2, 0, 9360, 17408, 0, 9360, 17408, 0, 9376, 17408, 0, 9376, 17408, 0, 10260, 16384, 0, 10264, 16384, 0, 10276, 16384, 0, 10280, 16384, 0, 10880, 34952, 0, 10880, 34952, 0, 10880, 34952, 0, 10880, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574733974261682_132_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574733974261682_132_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5a20885 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574733974261682_132_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 4097, 0, 2112, 4097, 0, 3264, 8194, 0, 3264, 8194, 0, 4608, 8194, 0, 4608, 8194, 0, 7184, 4, 0, 7200, 4, 0, 7216, 4, 0, 2112, 4097, 0, 2112, 4097, 0, 3264, 8194, 0, 3264, 8194, 0, 4608, 8194, 0, 4608, 8194, 0, 7184, 4, 0, 7200, 4, 0, 7216, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574734203212138_133_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574734203212138_133_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dee76325 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574734203212138_133_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,366 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() < 2)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((258 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 3584, 8, 0, 4480, 2080, 0, 4480, 2080, 0, 11456, 8194, 0, 11456, 8194, 0, 13824, 17476, 0, 13824, 17476, 0, 13824, 17476, 0, 13824, 17476, 0, 14272, 34952, 0, 14272, 34952, 0, 14272, 34952, 0, 14272, 34952, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 2944, 21845, 0, 3584, 8, 0, 4480, 2080, 0, 4480, 2080, 0, 11456, 8194, 0, 11456, 8194, 0, 13824, 17476, 0, 13824, 17476, 0, 13824, 17476, 0, 13824, 17476, 0, 14272, 34952, 0, 14272, 34952, 0, 14272, 34952, 0, 14272, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574735084457702_134_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574735084457702_134_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..93e1937c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574735084457702_134_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,374 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((297 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((348 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 498 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 2050, 0, 2560, 2050, 0, 2576, 2050, 0, 2576, 2050, 0, 2592, 2050, 0, 2592, 2050, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 8000, 34952, 0, 8000, 34952, 0, 8000, 34952, 0, 8000, 34952, 0, 8016, 34952, 0, 8016, 34952, 0, 8016, 34952, 0, 8016, 34952, 0, 8032, 34952, 0, 8032, 34952, 0, 8032, 34952, 0, 8032, 34952, 0, 9792, 34, 0, 9792, 34, 0, 9808, 34, 0, 9808, 34, 0, 9824, 34, 0, 9824, 34, 0, 10240, 21760, 0, 10240, 21760, 0, 10240, 21760, 0, 10240, 21760, 0, 10880, 17, 0, 10880, 17, 0, 12048, 4097, 0, 12048, 4097, 0, 12064, 4097, 0, 12064, 4097, 0, 16592, 4097, 0, 16592, 4097, 0, 16608, 4097, 0, 16608, 4097, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 20480, 85, 0, 20480, 85, 0, 20480, 85, 0, 20480, 85, 0, 22288, 32778, 0, 22288, 32778, 0, 22288, 32778, 0, 22304, 32778, 0, 22304, 32778, 0, 22304, 32778, 0, 22320, 32778, 0, 22320, 32778, 0, 22320, 32778, 0, 2560, 2050, 0, 2560, 2050, 0, 2576, 2050, 0, 2576, 2050, 0, 2592, 2050, 0, 2592, 2050, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6016, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6032, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 6048, 43690, 0, 8000, 34952, 0, 8000, 34952, 0, 8000, 34952, 0, 8000, 34952, 0, 8016, 34952, 0, 8016, 34952, 0, 8016, 34952, 0, 8016, 34952, 0, 8032, 34952, 0, 8032, 34952, 0, 8032, 34952, 0, 8032, 34952, 0, 9792, 34, 0, 9792, 34, 0, 9808, 34, 0, 9808, 34, 0, 9824, 34, 0, 9824, 34, 0, 10240, 21760, 0, 10240, 21760, 0, 10240, 21760, 0, 10240, 21760, 0, 10880, 17, 0, 10880, 17, 0, 12048, 4097, 0, 12048, 4097, 0, 12064, 4097, 0, 12064, 4097, 0, 16592, 4097, 0, 16592, 4097, 0, 16608, 4097, 0, 16608, 4097, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 16896, 21845, 0, 20480, 85, 0, 20480, 85, 0, 20480, 85, 0, 20480, 85, 0, 22288, 32778, 0, 22288, 32778, 0, 22288, 32778, 0, 22304, 32778, 0, 22304, 32778, 0, 22304, 32778, 0, 22320, 32778, 0, 22320, 32778, 0, 22320, 32778, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574738872461806_135_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574738872461806_135_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4d74a1f8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574738872461806_135_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,294 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 9)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 21504, 0, 1360, 21504, 0, 1360, 21504, 0, 1376, 21504, 0, 1376, 21504, 0, 1376, 21504, 0, 1392, 21504, 0, 1392, 21504, 0, 1392, 21504, 0, 6208, 2, 0, 6224, 2, 0, 9216, 8192, 0, 10384, 2, 0, 10400, 2, 0, 11472, 8192, 0, 11488, 8192, 0, 12176, 8194, 0, 12176, 8194, 0, 12192, 8194, 0, 12192, 8194, 0, 12864, 2, 0, 13312, 1, 0, 13952, 73, 0, 13952, 73, 0, 13952, 73, 0, 14528, 1040, 0, 14528, 1040, 0, 15488, 18432, 0, 15488, 18432, 0, 15504, 18432, 0, 15504, 18432, 0, 15520, 18432, 0, 15520, 18432, 0, 15936, 36, 0, 15936, 36, 0, 15952, 36, 0, 15952, 36, 0, 15968, 36, 0, 15968, 36, 0, 16640, 8, 0, 1360, 21504, 0, 1360, 21504, 0, 1360, 21504, 0, 1376, 21504, 0, 1376, 21504, 0, 1376, 21504, 0, 1392, 21504, 0, 1392, 21504, 0, 1392, 21504, 0, 6208, 2, 0, 6224, 2, 0, 9216, 8192, 0, 10384, 2, 0, 10400, 2, 0, 11472, 8192, 0, 11488, 8192, 0, 12176, 8194, 0, 12176, 8194, 0, 12192, 8194, 0, 12192, 8194, 0, 12864, 2, 0, 13312, 1, 0, 13952, 73, 0, 13952, 73, 0, 13952, 73, 0, 14528, 1040, 0, 14528, 1040, 0, 15488, 18432, 0, 15488, 18432, 0, 15504, 18432, 0, 15504, 18432, 0, 15520, 18432, 0, 15520, 18432, 0, 15936, 36, 0, 15936, 36, 0, 15952, 36, 0, 15952, 36, 0, 15968, 36, 0, 15968, 36, 0, 16640, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574741638541470_136_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574741638541470_136_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9bdb9543 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574741638541470_136_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,147 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 2240, 1040, 0, 2240, 1040, 0, 768, 73, 0, 768, 73, 0, 768, 73, 0, 2240, 1040, 0, 2240, 1040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574741807269953_137_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574741807269953_137_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b1821e4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574741807269953_137_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (i1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 456 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 16384, 0, 2320, 16384, 0, 2336, 16384, 0, 3264, 320, 0, 3264, 320, 0, 3280, 320, 0, 3280, 320, 0, 3296, 320, 0, 3296, 320, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 6336, 7, 0, 6336, 7, 0, 6336, 7, 0, 6352, 7, 0, 6352, 7, 0, 6352, 7, 0, 9024, 1056, 0, 9024, 1056, 0, 9040, 1056, 0, 9040, 1056, 0, 10436, 16896, 0, 10436, 16896, 0, 10440, 16896, 0, 10440, 16896, 0, 10452, 16896, 0, 10452, 16896, 0, 10456, 16896, 0, 10456, 16896, 0, 11396, 1280, 0, 11396, 1280, 0, 11400, 1280, 0, 11400, 1280, 0, 11412, 1280, 0, 11412, 1280, 0, 11416, 1280, 0, 11416, 1280, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 12864, 1, 0, 2304, 16384, 0, 2320, 16384, 0, 2336, 16384, 0, 3264, 320, 0, 3264, 320, 0, 3280, 320, 0, 3280, 320, 0, 3296, 320, 0, 3296, 320, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5696, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 5712, 43690, 0, 6336, 7, 0, 6336, 7, 0, 6336, 7, 0, 6352, 7, 0, 6352, 7, 0, 6352, 7, 0, 9024, 1056, 0, 9024, 1056, 0, 9040, 1056, 0, 9040, 1056, 0, 10436, 16896, 0, 10436, 16896, 0, 10440, 16896, 0, 10440, 16896, 0, 10452, 16896, 0, 10452, 16896, 0, 10456, 16896, 0, 10456, 16896, 0, 11396, 1280, 0, 11396, 1280, 0, 11400, 1280, 0, 11400, 1280, 0, 11412, 1280, 0, 11412, 1280, 0, 11416, 1280, 0, 11416, 1280, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11968, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 11984, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 13120, 43690, 0, 12864, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574754168210920_138_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574754168210920_138_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1e8b71ea --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574754168210920_138_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,326 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 32777, 0, 1280, 32777, 0, 1280, 32777, 0, 4992, 32769, 0, 4992, 32769, 0, 5952, 4096, 0, 8512, 5201, 0, 8512, 5201, 0, 8512, 5201, 0, 8512, 5201, 0, 8512, 5201, 0, 20736, 85, 0, 20736, 85, 0, 20736, 85, 0, 20736, 85, 0, 1280, 32777, 0, 1280, 32777, 0, 1280, 32777, 0, 4992, 32769, 0, 4992, 32769, 0, 5952, 4096, 0, 8512, 5201, 0, 8512, 5201, 0, 8512, 5201, 0, 8512, 5201, 0, 8512, 5201, 0, 20736, 85, 0, 20736, 85, 0, 20736, 85, 0, 20736, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574756199629930_139_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574756199629930_139_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5cc43d24 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574756199629930_139_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,268 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 11392, 16384, 0, 12560, 18436, 0, 12560, 18436, 0, 12560, 18436, 0, 12576, 18436, 0, 12576, 18436, 0, 12576, 18436, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 11392, 16384, 0, 12560, 18436, 0, 12560, 18436, 0, 12560, 18436, 0, 12576, 18436, 0, 12576, 18436, 0, 12576, 18436, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574768786591438_143_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574768786591438_143_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f3e09f7a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574768786591438_143_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1472, 8, 0, 3200, 2080, 0, 3200, 2080, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 5584, 85, 0, 5584, 85, 0, 5584, 85, 0, 5584, 85, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1472, 8, 0, 3200, 2080, 0, 3200, 2080, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 4496, 21845, 0, 5584, 85, 0, 5584, 85, 0, 5584, 85, 0, 5584, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574769184892168_144_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574769184892168_144_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5dae2c1d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574769184892168_144_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,218 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2448, 2, 0, 2464, 2, 0, 4416, 85, 0, 4416, 85, 0, 4416, 85, 0, 4416, 85, 0, 8448, 128, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2448, 2, 0, 2464, 2, 0, 4416, 85, 0, 4416, 85, 0, 4416, 85, 0, 4416, 85, 0, 8448, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574769583307924_145_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574769583307924_145_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7463e89c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574769583307924_145_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1488, 2, 0, 1504, 2, 0, 2448, 43008, 0, 2448, 43008, 0, 2448, 43008, 0, 2452, 43008, 0, 2452, 43008, 0, 2452, 43008, 0, 2456, 43008, 0, 2456, 43008, 0, 2456, 43008, 0, 2464, 43008, 0, 2464, 43008, 0, 2464, 43008, 0, 2468, 43008, 0, 2468, 43008, 0, 2468, 43008, 0, 2472, 43008, 0, 2472, 43008, 0, 2472, 43008, 0, 4112, 512, 0, 4116, 512, 0, 4120, 512, 0, 4128, 512, 0, 4132, 512, 0, 4136, 512, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1488, 2, 0, 1504, 2, 0, 2448, 43008, 0, 2448, 43008, 0, 2448, 43008, 0, 2452, 43008, 0, 2452, 43008, 0, 2452, 43008, 0, 2456, 43008, 0, 2456, 43008, 0, 2456, 43008, 0, 2464, 43008, 0, 2464, 43008, 0, 2464, 43008, 0, 2468, 43008, 0, 2468, 43008, 0, 2468, 43008, 0, 2472, 43008, 0, 2472, 43008, 0, 2472, 43008, 0, 4112, 512, 0, 4116, 512, 0, 4120, 512, 0, 4128, 512, 0, 4132, 512, 0, 4136, 512, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574771423393215_146_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574771423393215_146_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a7b954ed --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574771423393215_146_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,409 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((180 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((189 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((205 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 6912, 2048, 0, 10432, 16388, 0, 10432, 16388, 0, 10448, 16388, 0, 10448, 16388, 0, 10464, 16388, 0, 10464, 16388, 0, 12420, 16452, 0, 12420, 16452, 0, 12420, 16452, 0, 12424, 16452, 0, 12424, 16452, 0, 12424, 16452, 0, 12428, 16452, 0, 12428, 16452, 0, 12428, 16452, 0, 12436, 16452, 0, 12436, 16452, 0, 12436, 16452, 0, 12440, 16452, 0, 12440, 16452, 0, 12440, 16452, 0, 12444, 16452, 0, 12444, 16452, 0, 12444, 16452, 0, 12452, 16452, 0, 12452, 16452, 0, 12452, 16452, 0, 12456, 16452, 0, 12456, 16452, 0, 12456, 16452, 0, 12460, 16452, 0, 12460, 16452, 0, 12460, 16452, 0, 15040, 16388, 0, 15040, 16388, 0, 15056, 16388, 0, 15056, 16388, 0, 15072, 16388, 0, 15072, 16388, 0, 16320, 1, 0, 17856, 1, 0, 21568, 32, 0, 22016, 8, 0, 23232, 2176, 0, 23232, 2176, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 6912, 2048, 0, 10432, 16388, 0, 10432, 16388, 0, 10448, 16388, 0, 10448, 16388, 0, 10464, 16388, 0, 10464, 16388, 0, 12420, 16452, 0, 12420, 16452, 0, 12420, 16452, 0, 12424, 16452, 0, 12424, 16452, 0, 12424, 16452, 0, 12428, 16452, 0, 12428, 16452, 0, 12428, 16452, 0, 12436, 16452, 0, 12436, 16452, 0, 12436, 16452, 0, 12440, 16452, 0, 12440, 16452, 0, 12440, 16452, 0, 12444, 16452, 0, 12444, 16452, 0, 12444, 16452, 0, 12452, 16452, 0, 12452, 16452, 0, 12452, 16452, 0, 12456, 16452, 0, 12456, 16452, 0, 12456, 16452, 0, 12460, 16452, 0, 12460, 16452, 0, 12460, 16452, 0, 15040, 16388, 0, 15040, 16388, 0, 15056, 16388, 0, 15056, 16388, 0, 15072, 16388, 0, 15072, 16388, 0, 16320, 1, 0, 17856, 1, 0, 21568, 32, 0, 22016, 8, 0, 23232, 2176, 0, 23232, 2176, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574776437568621_147_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574776437568621_147_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5f38256e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574776437568621_147_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((counter0 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 40, 0, 1856, 40, 0, 4624, 8, 0, 4628, 8, 0, 4632, 8, 0, 5520, 16416, 0, 5520, 16416, 0, 5524, 16416, 0, 5524, 16416, 0, 5528, 16416, 0, 5528, 16416, 0, 6912, 16384, 0, 9792, 2, 0, 9536, 512, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 8384, 40960, 0, 8384, 40960, 0, 1856, 40, 0, 1856, 40, 0, 4624, 8, 0, 4628, 8, 0, 4632, 8, 0, 5520, 16416, 0, 5520, 16416, 0, 5524, 16416, 0, 5524, 16416, 0, 5528, 16416, 0, 5528, 16416, 0, 6912, 16384, 0, 9792, 2, 0, 9536, 512, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 9152, 21845, 0, 8384, 40960, 0, 8384, 40960, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574790268010281_149_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574790268010281_149_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0cb62c93 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574790268010281_149_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,132 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1680, 16, 0, 1696, 16, 0, 1712, 16, 0, 3200, 17476, 0, 3200, 17476, 0, 3200, 17476, 0, 3200, 17476, 0, 3648, 34952, 0, 3648, 34952, 0, 3648, 34952, 0, 3648, 34952, 0, 1680, 16, 0, 1696, 16, 0, 1712, 16, 0, 3200, 17476, 0, 3200, 17476, 0, 3200, 17476, 0, 3200, 17476, 0, 3648, 34952, 0, 3648, 34952, 0, 3648, 34952, 0, 3648, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574804987985781_151_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574804987985781_151_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b7a48e7f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574804987985781_151_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 4161, 0, 1600, 4161, 0, 1600, 4161, 0, 2176, 5201, 0, 2176, 5201, 0, 2176, 5201, 0, 2176, 5201, 0, 2176, 5201, 0, 5888, 16644, 0, 5888, 16644, 0, 5888, 16644, 0, 5904, 16644, 0, 5904, 16644, 0, 5904, 16644, 0, 6464, 16644, 0, 6464, 16644, 0, 6464, 16644, 0, 6480, 16644, 0, 6480, 16644, 0, 6480, 16644, 0, 6912, 4, 0, 6928, 4, 0, 1600, 4161, 0, 1600, 4161, 0, 1600, 4161, 0, 2176, 5201, 0, 2176, 5201, 0, 2176, 5201, 0, 2176, 5201, 0, 2176, 5201, 0, 5888, 16644, 0, 5888, 16644, 0, 5888, 16644, 0, 5904, 16644, 0, 5904, 16644, 0, 5904, 16644, 0, 6464, 16644, 0, 6464, 16644, 0, 6464, 16644, 0, 6480, 16644, 0, 6480, 16644, 0, 6480, 16644, 0, 6912, 4, 0, 6928, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574805358893554_152_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574805358893554_152_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0302bb4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574805358893554_152_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,284 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((111 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 2768, 16, 0, 2784, 16, 0, 3264, 256, 0, 4160, 17476, 0, 4160, 17476, 0, 4160, 17476, 0, 4160, 17476, 0, 4608, 34952, 0, 4608, 34952, 0, 4608, 34952, 0, 4608, 34952, 0, 6016, 20485, 0, 6016, 20485, 0, 6016, 20485, 0, 6016, 20485, 0, 6032, 20485, 0, 6032, 20485, 0, 6032, 20485, 0, 6032, 20485, 0, 8320, 16385, 0, 8320, 16385, 0, 8336, 16385, 0, 8336, 16385, 0, 11328, 32, 0, 768, 1, 0, 2768, 16, 0, 2784, 16, 0, 3264, 256, 0, 4160, 17476, 0, 4160, 17476, 0, 4160, 17476, 0, 4160, 17476, 0, 4608, 34952, 0, 4608, 34952, 0, 4608, 34952, 0, 4608, 34952, 0, 6016, 20485, 0, 6016, 20485, 0, 6016, 20485, 0, 6016, 20485, 0, 6032, 20485, 0, 6032, 20485, 0, 6032, 20485, 0, 6032, 20485, 0, 8320, 16385, 0, 8320, 16385, 0, 8336, 16385, 0, 8336, 16385, 0, 11328, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574808681554188_153_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574808681554188_153_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bcda2316 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574808681554188_153_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,236 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 1, 0, 5392, 256, 0, 7056, 64, 0, 7072, 64, 0, 7088, 64, 0, 8784, 8260, 0, 8784, 8260, 0, 8784, 8260, 0, 8788, 8260, 0, 8788, 8260, 0, 8788, 8260, 0, 8800, 8260, 0, 8800, 8260, 0, 8800, 8260, 0, 8804, 8260, 0, 8804, 8260, 0, 8804, 8260, 0, 8816, 8260, 0, 8816, 8260, 0, 8816, 8260, 0, 8820, 8260, 0, 8820, 8260, 0, 8820, 8260, 0, 10832, 4225, 0, 10832, 4225, 0, 10832, 4225, 0, 10836, 4225, 0, 10836, 4225, 0, 10836, 4225, 0, 10848, 4225, 0, 10848, 4225, 0, 10848, 4225, 0, 10852, 4225, 0, 10852, 4225, 0, 10852, 4225, 0, 10864, 4225, 0, 10864, 4225, 0, 10864, 4225, 0, 10868, 4225, 0, 10868, 4225, 0, 10868, 4225, 0, 11472, 1, 0, 11488, 1, 0, 11504, 1, 0, 1536, 1, 0, 5392, 256, 0, 7056, 64, 0, 7072, 64, 0, 7088, 64, 0, 8784, 8260, 0, 8784, 8260, 0, 8784, 8260, 0, 8788, 8260, 0, 8788, 8260, 0, 8788, 8260, 0, 8800, 8260, 0, 8800, 8260, 0, 8800, 8260, 0, 8804, 8260, 0, 8804, 8260, 0, 8804, 8260, 0, 8816, 8260, 0, 8816, 8260, 0, 8816, 8260, 0, 8820, 8260, 0, 8820, 8260, 0, 8820, 8260, 0, 10832, 4225, 0, 10832, 4225, 0, 10832, 4225, 0, 10836, 4225, 0, 10836, 4225, 0, 10836, 4225, 0, 10848, 4225, 0, 10848, 4225, 0, 10848, 4225, 0, 10852, 4225, 0, 10852, 4225, 0, 10852, 4225, 0, 10864, 4225, 0, 10864, 4225, 0, 10864, 4225, 0, 10868, 4225, 0, 10868, 4225, 0, 10868, 4225, 0, 11472, 1, 0, 11488, 1, 0, 11504, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574820959927707_155_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574820959927707_155_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..88545fb5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574820959927707_155_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574821075760031_156_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574821075760031_156_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c930d99d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574821075760031_156_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,225 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 2048, 0, 3088, 2048, 0, 4992, 16, 0, 4996, 16, 0, 5000, 16, 0, 5008, 16, 0, 5012, 16, 0, 5016, 16, 0, 6080, 2048, 0, 6096, 2048, 0, 6912, 17, 0, 6912, 17, 0, 7808, 17476, 0, 7808, 17476, 0, 7808, 17476, 0, 7808, 17476, 0, 8256, 34952, 0, 8256, 34952, 0, 8256, 34952, 0, 8256, 34952, 0, 9152, 73, 0, 9152, 73, 0, 9152, 73, 0, 9728, 1040, 0, 9728, 1040, 0, 10048, 18724, 0, 10048, 18724, 0, 10048, 18724, 0, 10048, 18724, 0, 10048, 18724, 0, 3072, 2048, 0, 3088, 2048, 0, 4992, 16, 0, 4996, 16, 0, 5000, 16, 0, 5008, 16, 0, 5012, 16, 0, 5016, 16, 0, 6080, 2048, 0, 6096, 2048, 0, 6912, 17, 0, 6912, 17, 0, 7808, 17476, 0, 7808, 17476, 0, 7808, 17476, 0, 7808, 17476, 0, 8256, 34952, 0, 8256, 34952, 0, 8256, 34952, 0, 8256, 34952, 0, 9152, 73, 0, 9152, 73, 0, 9152, 73, 0, 9728, 1040, 0, 9728, 1040, 0, 10048, 18724, 0, 10048, 18724, 0, 10048, 18724, 0, 10048, 18724, 0, 10048, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574823713681892_157_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574823713681892_157_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4e2fc11c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574823713681892_157_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574824056238747_159_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574824056238747_159_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5eb5efee --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574824056238747_159_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,418 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (371 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 5056, 24576, 0, 5056, 24576, 0, 6016, 24576, 0, 6016, 24576, 0, 6032, 24576, 0, 6032, 24576, 0, 6048, 24576, 0, 6048, 24576, 0, 6720, 16384, 0, 7424, 6, 0, 7424, 6, 0, 8576, 57344, 0, 8576, 57344, 0, 8576, 57344, 0, 9792, 16384, 0, 10880, 1, 0, 12688, 16, 0, 12704, 16, 0, 12720, 16, 0, 16192, 17476, 0, 16192, 17476, 0, 16192, 17476, 0, 16192, 17476, 0, 23744, 4, 0, 576, 17, 0, 576, 17, 0, 5056, 24576, 0, 5056, 24576, 0, 6016, 24576, 0, 6016, 24576, 0, 6032, 24576, 0, 6032, 24576, 0, 6048, 24576, 0, 6048, 24576, 0, 6720, 16384, 0, 7424, 6, 0, 7424, 6, 0, 8576, 57344, 0, 8576, 57344, 0, 8576, 57344, 0, 9792, 16384, 0, 10880, 1, 0, 12688, 16, 0, 12704, 16, 0, 12720, 16, 0, 16192, 17476, 0, 16192, 17476, 0, 16192, 17476, 0, 16192, 17476, 0, 23744, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574825527120480_160_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574825527120480_160_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6d5b308c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574825527120480_160_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,413 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((335 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((349 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((365 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((395 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((418 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 2496, 16384, 0, 5760, 32768, 0, 6400, 8, 0, 7296, 2048, 0, 8256, 40968, 0, 8256, 40968, 0, 8256, 40968, 0, 16960, 8322, 0, 16960, 8322, 0, 16960, 8322, 0, 16976, 8322, 0, 16976, 8322, 0, 16976, 8322, 0, 17920, 18724, 0, 17920, 18724, 0, 17920, 18724, 0, 17920, 18724, 0, 17920, 18724, 0, 20032, 17921, 0, 20032, 17921, 0, 20032, 17921, 0, 20032, 17921, 0, 20048, 17921, 0, 20048, 17921, 0, 20048, 17921, 0, 20048, 17921, 0, 20064, 17921, 0, 20064, 17921, 0, 20064, 17921, 0, 20064, 17921, 0, 21440, 16384, 0, 21456, 16384, 0, 21472, 16384, 0, 25280, 16, 0, 25296, 16, 0, 25312, 16, 0, 26752, 33161, 0, 26752, 33161, 0, 26752, 33161, 0, 26752, 33161, 0, 26752, 33161, 0, 26768, 33161, 0, 26768, 33161, 0, 26768, 33161, 0, 26768, 33161, 0, 26768, 33161, 0, 26784, 33161, 0, 26784, 33161, 0, 26784, 33161, 0, 26784, 33161, 0, 26784, 33161, 0, 768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 2496, 16384, 0, 5760, 32768, 0, 6400, 8, 0, 7296, 2048, 0, 8256, 40968, 0, 8256, 40968, 0, 8256, 40968, 0, 16960, 8322, 0, 16960, 8322, 0, 16960, 8322, 0, 16976, 8322, 0, 16976, 8322, 0, 16976, 8322, 0, 17920, 18724, 0, 17920, 18724, 0, 17920, 18724, 0, 17920, 18724, 0, 17920, 18724, 0, 20032, 17921, 0, 20032, 17921, 0, 20032, 17921, 0, 20032, 17921, 0, 20048, 17921, 0, 20048, 17921, 0, 20048, 17921, 0, 20048, 17921, 0, 20064, 17921, 0, 20064, 17921, 0, 20064, 17921, 0, 20064, 17921, 0, 21440, 16384, 0, 21456, 16384, 0, 21472, 16384, 0, 25280, 16, 0, 25296, 16, 0, 25312, 16, 0, 26752, 33161, 0, 26752, 33161, 0, 26752, 33161, 0, 26752, 33161, 0, 26752, 33161, 0, 26768, 33161, 0, 26768, 33161, 0, 26768, 33161, 0, 26768, 33161, 0, 26768, 33161, 0, 26784, 33161, 0, 26784, 33161, 0, 26784, 33161, 0, 26784, 33161, 0, 26784, 33161, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574833444926974_161_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574833444926974_161_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..20ede919 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574833444926974_161_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 32768, 0, 1616, 32768, 0, 1632, 32768, 0, 2816, 32769, 0, 2816, 32769, 0, 2820, 32769, 0, 2820, 32769, 0, 2824, 32769, 0, 2824, 32769, 0, 2832, 32769, 0, 2832, 32769, 0, 2836, 32769, 0, 2836, 32769, 0, 2840, 32769, 0, 2840, 32769, 0, 2848, 32769, 0, 2848, 32769, 0, 2852, 32769, 0, 2852, 32769, 0, 2856, 32769, 0, 2856, 32769, 0, 3456, 1, 0, 3460, 1, 0, 3464, 1, 0, 3472, 1, 0, 3476, 1, 0, 3480, 1, 0, 3488, 1, 0, 3492, 1, 0, 3496, 1, 0, 4352, 64, 0, 4356, 64, 0, 4360, 64, 0, 4368, 64, 0, 4372, 64, 0, 4376, 64, 0, 4384, 64, 0, 4388, 64, 0, 4392, 64, 0, 4800, 32776, 0, 4800, 32776, 0, 4804, 32776, 0, 4804, 32776, 0, 4808, 32776, 0, 4808, 32776, 0, 4816, 32776, 0, 4816, 32776, 0, 4820, 32776, 0, 4820, 32776, 0, 4824, 32776, 0, 4824, 32776, 0, 4832, 32776, 0, 4832, 32776, 0, 4836, 32776, 0, 4836, 32776, 0, 4840, 32776, 0, 4840, 32776, 0, 6144, 513, 0, 6144, 513, 0, 6160, 513, 0, 6160, 513, 0, 6176, 513, 0, 6176, 513, 0, 6720, 1040, 0, 6720, 1040, 0, 7040, 18724, 0, 7040, 18724, 0, 7040, 18724, 0, 7040, 18724, 0, 7040, 18724, 0, 1600, 32768, 0, 1616, 32768, 0, 1632, 32768, 0, 2816, 32769, 0, 2816, 32769, 0, 2820, 32769, 0, 2820, 32769, 0, 2824, 32769, 0, 2824, 32769, 0, 2832, 32769, 0, 2832, 32769, 0, 2836, 32769, 0, 2836, 32769, 0, 2840, 32769, 0, 2840, 32769, 0, 2848, 32769, 0, 2848, 32769, 0, 2852, 32769, 0, 2852, 32769, 0, 2856, 32769, 0, 2856, 32769, 0, 3456, 1, 0, 3460, 1, 0, 3464, 1, 0, 3472, 1, 0, 3476, 1, 0, 3480, 1, 0, 3488, 1, 0, 3492, 1, 0, 3496, 1, 0, 4352, 64, 0, 4356, 64, 0, 4360, 64, 0, 4368, 64, 0, 4372, 64, 0, 4376, 64, 0, 4384, 64, 0, 4388, 64, 0, 4392, 64, 0, 4800, 32776, 0, 4800, 32776, 0, 4804, 32776, 0, 4804, 32776, 0, 4808, 32776, 0, 4808, 32776, 0, 4816, 32776, 0, 4816, 32776, 0, 4820, 32776, 0, 4820, 32776, 0, 4824, 32776, 0, 4824, 32776, 0, 4832, 32776, 0, 4832, 32776, 0, 4836, 32776, 0, 4836, 32776, 0, 4840, 32776, 0, 4840, 32776, 0, 6144, 513, 0, 6144, 513, 0, 6160, 513, 0, 6160, 513, 0, 6176, 513, 0, 6176, 513, 0, 6720, 1040, 0, 6720, 1040, 0, 7040, 18724, 0, 7040, 18724, 0, 7040, 18724, 0, 7040, 18724, 0, 7040, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574843286230405_162_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574843286230405_162_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..91d19e04 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574843286230405_162_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,145 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 16, 0, 4928, 1040, 0, 4928, 1040, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 2304, 16, 0, 4928, 1040, 0, 4928, 1040, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0, 6080, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574843464116248_163_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574843464116248_163_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..225007e6 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574843464116248_163_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,452 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 6)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (397 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((414 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((426 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((433 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((440 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((449 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (453 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 360 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1232, 73, 0, 1232, 73, 0, 1232, 73, 0, 1248, 73, 0, 1248, 73, 0, 1248, 73, 0, 3840, 2, 0, 3844, 2, 0, 3848, 2, 0, 3856, 2, 0, 3860, 2, 0, 3864, 2, 0, 4608, 1040, 0, 4608, 1040, 0, 4624, 1040, 0, 4624, 1040, 0, 4928, 18724, 0, 4928, 18724, 0, 4928, 18724, 0, 4928, 18724, 0, 4928, 18724, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 7232, 49155, 0, 7232, 49155, 0, 7232, 49155, 0, 7232, 49155, 0, 7872, 1, 0, 9040, 1, 0, 9056, 1, 0, 9072, 1, 0, 9344, 16384, 0, 11264, 12288, 0, 11264, 12288, 0, 11264, 49153, 0, 11264, 49153, 0, 11264, 49153, 0, 14976, 272, 0, 14976, 272, 0, 16144, 1, 0, 16160, 1, 0, 19008, 32, 0, 21568, 4, 0, 21584, 4, 0, 21600, 4, 0, 26496, 34952, 0, 26496, 34952, 0, 26496, 34952, 0, 26496, 34952, 0, 26512, 34952, 0, 26512, 34952, 0, 26512, 34952, 0, 26512, 34952, 0, 28160, 2048, 0, 28176, 2048, 0, 1232, 73, 0, 1232, 73, 0, 1232, 73, 0, 1248, 73, 0, 1248, 73, 0, 1248, 73, 0, 3840, 2, 0, 3844, 2, 0, 3848, 2, 0, 3856, 2, 0, 3860, 2, 0, 3864, 2, 0, 4608, 1040, 0, 4608, 1040, 0, 4624, 1040, 0, 4624, 1040, 0, 4928, 18724, 0, 4928, 18724, 0, 4928, 18724, 0, 4928, 18724, 0, 4928, 18724, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 6080, 61447, 0, 7232, 49155, 0, 7232, 49155, 0, 7232, 49155, 0, 7232, 49155, 0, 7872, 1, 0, 9040, 1, 0, 9056, 1, 0, 9072, 1, 0, 9344, 16384, 0, 11264, 12288, 0, 11264, 12288, 0, 11264, 49153, 0, 11264, 49153, 0, 11264, 49153, 0, 14976, 272, 0, 14976, 272, 0, 16144, 1, 0, 16160, 1, 0, 19008, 32, 0, 21568, 4, 0, 21584, 4, 0, 21600, 4, 0, 26496, 34952, 0, 26496, 34952, 0, 26496, 34952, 0, 26496, 34952, 0, 26512, 34952, 0, 26512, 34952, 0, 26512, 34952, 0, 26512, 34952, 0, 28160, 2048, 0, 28176, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574853854970141_164_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574853854970141_164_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..696edcf1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574853854970141_164_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,169 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2560, 16, 0, 2576, 16, 0, 3648, 8322, 0, 3648, 8322, 0, 3648, 8322, 0, 3652, 8322, 0, 3652, 8322, 0, 3652, 8322, 0, 3656, 8322, 0, 3656, 8322, 0, 3656, 8322, 0, 3664, 8322, 0, 3664, 8322, 0, 3664, 8322, 0, 3668, 8322, 0, 3668, 8322, 0, 3668, 8322, 0, 3672, 8322, 0, 3672, 8322, 0, 3672, 8322, 0, 4224, 8322, 0, 4224, 8322, 0, 4224, 8322, 0, 4228, 8322, 0, 4228, 8322, 0, 4228, 8322, 0, 4232, 8322, 0, 4232, 8322, 0, 4232, 8322, 0, 4240, 8322, 0, 4240, 8322, 0, 4240, 8322, 0, 4244, 8322, 0, 4244, 8322, 0, 4244, 8322, 0, 4248, 8322, 0, 4248, 8322, 0, 4248, 8322, 0, 5696, 16, 0, 5712, 16, 0, 8272, 16384, 0, 8288, 16384, 0, 8976, 16384, 0, 8992, 16384, 0, 10192, 4, 0, 10208, 4, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2560, 16, 0, 2576, 16, 0, 3648, 8322, 0, 3648, 8322, 0, 3648, 8322, 0, 3652, 8322, 0, 3652, 8322, 0, 3652, 8322, 0, 3656, 8322, 0, 3656, 8322, 0, 3656, 8322, 0, 3664, 8322, 0, 3664, 8322, 0, 3664, 8322, 0, 3668, 8322, 0, 3668, 8322, 0, 3668, 8322, 0, 3672, 8322, 0, 3672, 8322, 0, 3672, 8322, 0, 4224, 8322, 0, 4224, 8322, 0, 4224, 8322, 0, 4228, 8322, 0, 4228, 8322, 0, 4228, 8322, 0, 4232, 8322, 0, 4232, 8322, 0, 4232, 8322, 0, 4240, 8322, 0, 4240, 8322, 0, 4240, 8322, 0, 4244, 8322, 0, 4244, 8322, 0, 4244, 8322, 0, 4248, 8322, 0, 4248, 8322, 0, 4248, 8322, 0, 5696, 16, 0, 5712, 16, 0, 8272, 16384, 0, 8288, 16384, 0, 8976, 16384, 0, 8992, 16384, 0, 10192, 4, 0, 10208, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574855940290546_165_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574855940290546_165_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2af824d1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574855940290546_165_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 1))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574863770464167_167_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574863770464167_167_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d372a225 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574863770464167_167_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,229 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 1664, 16644, 0, 1664, 16644, 0, 1664, 16644, 0, 4352, 1025, 0, 4352, 1025, 0, 7552, 4, 0, 8896, 4, 0, 768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 1664, 16644, 0, 1664, 16644, 0, 1664, 16644, 0, 4352, 1025, 0, 4352, 1025, 0, 7552, 4, 0, 8896, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574863998828921_168_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574863998828921_168_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e255eb62 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574863998828921_168_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,282 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1664, 8, 0, 3520, 40970, 0, 3520, 40970, 0, 3520, 40970, 0, 3520, 40970, 0, 6016, 1, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5376, 1024, 0, 4864, 20480, 0, 4864, 20480, 0, 6656, 17, 0, 6656, 17, 0, 7232, 4369, 0, 7232, 4369, 0, 7232, 4369, 0, 7232, 4369, 0, 7552, 17476, 0, 7552, 17476, 0, 7552, 17476, 0, 7552, 17476, 0, 8704, 34816, 0, 8704, 34816, 0, 14080, 32768, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1664, 8, 0, 3520, 40970, 0, 3520, 40970, 0, 3520, 40970, 0, 3520, 40970, 0, 6016, 1, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5376, 1024, 0, 4864, 20480, 0, 4864, 20480, 0, 6656, 17, 0, 6656, 17, 0, 7232, 4369, 0, 7232, 4369, 0, 7232, 4369, 0, 7232, 4369, 0, 7552, 17476, 0, 7552, 17476, 0, 7552, 17476, 0, 7552, 17476, 0, 8704, 34816, 0, 8704, 34816, 0, 14080, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574864610034830_169_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574864610034830_169_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..59a42e3d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574864610034830_169_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,237 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4096, 16384, 0, 4112, 16384, 0, 4800, 16384, 0, 5824, 260, 0, 5824, 260, 0, 6464, 4, 0, 8704, 2048, 0, 8704, 4, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4096, 16384, 0, 4112, 16384, 0, 4800, 16384, 0, 5824, 260, 0, 5824, 260, 0, 6464, 4, 0, 8704, 2048, 0, 8704, 4, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0, 13632, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574864953911692_170_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574864953911692_170_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..df4a9316 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574864953911692_170_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,313 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 256, 0, 6144, 2, 0, 6912, 32, 0, 8384, 288, 0, 8384, 288, 0, 9280, 128, 0, 14736, 17429, 0, 14736, 17429, 0, 14736, 17429, 0, 14736, 17429, 0, 14736, 17429, 0, 14752, 17429, 0, 14752, 17429, 0, 14752, 17429, 0, 14752, 17429, 0, 14752, 17429, 0, 14768, 17429, 0, 14768, 17429, 0, 14768, 17429, 0, 14768, 17429, 0, 14768, 17429, 0, 15376, 21, 0, 15376, 21, 0, 15376, 21, 0, 15392, 21, 0, 15392, 21, 0, 15392, 21, 0, 15408, 21, 0, 15408, 21, 0, 15408, 21, 0, 16080, 16405, 0, 16080, 16405, 0, 16080, 16405, 0, 16080, 16405, 0, 16096, 16405, 0, 16096, 16405, 0, 16096, 16405, 0, 16096, 16405, 0, 16112, 16405, 0, 16112, 16405, 0, 16112, 16405, 0, 16112, 16405, 0, 16704, 73, 0, 16704, 73, 0, 16704, 73, 0, 17280, 1040, 0, 17280, 1040, 0, 17600, 18724, 0, 17600, 18724, 0, 17600, 18724, 0, 17600, 18724, 0, 17600, 18724, 0, 1856, 256, 0, 6144, 2, 0, 6912, 32, 0, 8384, 288, 0, 8384, 288, 0, 9280, 128, 0, 14736, 17429, 0, 14736, 17429, 0, 14736, 17429, 0, 14736, 17429, 0, 14736, 17429, 0, 14752, 17429, 0, 14752, 17429, 0, 14752, 17429, 0, 14752, 17429, 0, 14752, 17429, 0, 14768, 17429, 0, 14768, 17429, 0, 14768, 17429, 0, 14768, 17429, 0, 14768, 17429, 0, 15376, 21, 0, 15376, 21, 0, 15376, 21, 0, 15392, 21, 0, 15392, 21, 0, 15392, 21, 0, 15408, 21, 0, 15408, 21, 0, 15408, 21, 0, 16080, 16405, 0, 16080, 16405, 0, 16080, 16405, 0, 16080, 16405, 0, 16096, 16405, 0, 16096, 16405, 0, 16096, 16405, 0, 16096, 16405, 0, 16112, 16405, 0, 16112, 16405, 0, 16112, 16405, 0, 16112, 16405, 0, 16704, 73, 0, 16704, 73, 0, 16704, 73, 0, 17280, 1040, 0, 17280, 1040, 0, 17600, 18724, 0, 17600, 18724, 0, 17600, 18724, 0, 17600, 18724, 0, 17600, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574869324875531_171_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574869324875531_171_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..198fa7ed --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574869324875531_171_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,307 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 32768, 0, 10944, 1, 0, 10960, 1, 0, 10976, 1, 0, 12096, 1, 0, 12112, 1, 0, 12128, 1, 0, 12800, 1, 0, 12816, 1, 0, 12832, 1, 0, 16016, 34, 0, 16016, 34, 0, 16032, 34, 0, 16032, 34, 0, 17152, 4096, 0, 17168, 4096, 0, 17184, 4096, 0, 19776, 34952, 0, 19776, 34952, 0, 19776, 34952, 0, 19776, 34952, 0, 4096, 32768, 0, 10944, 1, 0, 10960, 1, 0, 10976, 1, 0, 12096, 1, 0, 12112, 1, 0, 12128, 1, 0, 12800, 1, 0, 12816, 1, 0, 12832, 1, 0, 16016, 34, 0, 16016, 34, 0, 16032, 34, 0, 16032, 34, 0, 17152, 4096, 0, 17168, 4096, 0, 17184, 4096, 0, 19776, 34952, 0, 19776, 34952, 0, 19776, 34952, 0, 19776, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574880091940040_173_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574880091940040_173_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c1e5f1e5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574880091940040_173_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,330 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((250 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 2512, 2568, 0, 2512, 2568, 0, 2512, 2568, 0, 2528, 2568, 0, 2528, 2568, 0, 2528, 2568, 0, 2944, 21504, 0, 2944, 21504, 0, 2944, 21504, 0, 5380, 4, 0, 5384, 4, 0, 5388, 4, 0, 5396, 4, 0, 5400, 4, 0, 5404, 4, 0, 5412, 4, 0, 5416, 4, 0, 5420, 4, 0, 6272, 1, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 8384, 73, 0, 8384, 73, 0, 8384, 73, 0, 21888, 2080, 0, 21888, 2080, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 2512, 2568, 0, 2512, 2568, 0, 2512, 2568, 0, 2528, 2568, 0, 2528, 2568, 0, 2528, 2568, 0, 2944, 21504, 0, 2944, 21504, 0, 2944, 21504, 0, 5380, 4, 0, 5384, 4, 0, 5388, 4, 0, 5396, 4, 0, 5400, 4, 0, 5404, 4, 0, 5412, 4, 0, 5416, 4, 0, 5420, 4, 0, 6272, 1, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 8384, 73, 0, 8384, 73, 0, 8384, 73, 0, 21888, 2080, 0, 21888, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574901801409287_174_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574901801409287_174_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a73e1a48 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574901801409287_174_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2752, 8, 0, 3648, 2048, 0, 4544, 17, 0, 4544, 17, 0, 5440, 17476, 0, 5440, 17476, 0, 5440, 17476, 0, 5440, 17476, 0, 6080, 8, 0, 9280, 2048, 0, 10176, 73, 0, 10176, 73, 0, 10176, 73, 0, 10752, 1040, 0, 10752, 1040, 0, 12800, 16384, 0, 13952, 16384, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2112, 26214, 0, 2752, 8, 0, 3648, 2048, 0, 4544, 17, 0, 4544, 17, 0, 5440, 17476, 0, 5440, 17476, 0, 5440, 17476, 0, 5440, 17476, 0, 6080, 8, 0, 9280, 2048, 0, 10176, 73, 0, 10176, 73, 0, 10176, 73, 0, 10752, 1040, 0, 10752, 1040, 0, 12800, 16384, 0, 13952, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574903431951890_175_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574903431951890_175_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a12215aa --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574903431951890_175_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,272 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6144, 17, 0, 6144, 17, 0, 12032, 64, 0, 16256, 34952, 0, 16256, 34952, 0, 16256, 34952, 0, 16256, 34952, 0, 6144, 17, 0, 6144, 17, 0, 12032, 64, 0, 16256, 34952, 0, 16256, 34952, 0, 16256, 34952, 0, 16256, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574903653590703_176_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574903653590703_176_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b11c4a2a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574903653590703_176_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 6032, 2, 0, 6048, 2, 0, 6976, 256, 0, 6992, 256, 0, 7008, 256, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 6032, 2, 0, 6048, 2, 0, 6976, 256, 0, 6992, 256, 0, 7008, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574904001524805_177_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574904001524805_177_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5d5ba0f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574904001524805_177_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,307 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((249 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((264 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 636 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 1792, 20480, 0, 1792, 20480, 0, 1808, 20480, 0, 1808, 20480, 0, 1824, 20480, 0, 1824, 20480, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 5248, 512, 0, 6160, 40960, 0, 6160, 40960, 0, 6176, 40960, 0, 6176, 40960, 0, 6192, 40960, 0, 6192, 40960, 0, 6800, 2, 0, 6816, 2, 0, 6832, 2, 0, 7824, 8192, 0, 7840, 8192, 0, 7856, 8192, 0, 8272, 43520, 0, 8272, 43520, 0, 8272, 43520, 0, 8272, 43520, 0, 8288, 43520, 0, 8288, 43520, 0, 8288, 43520, 0, 8288, 43520, 0, 8304, 43520, 0, 8304, 43520, 0, 8304, 43520, 0, 8304, 43520, 0, 8960, 512, 0, 13504, 1040, 0, 13504, 1040, 0, 15956, 2048, 0, 15960, 2048, 0, 15972, 2048, 0, 15976, 2048, 0, 16916, 256, 0, 16920, 256, 0, 16932, 256, 0, 16936, 256, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 1792, 20480, 0, 1792, 20480, 0, 1808, 20480, 0, 1808, 20480, 0, 1824, 20480, 0, 1824, 20480, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3396, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3400, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3412, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3416, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3428, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 3432, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 5248, 512, 0, 6160, 40960, 0, 6160, 40960, 0, 6176, 40960, 0, 6176, 40960, 0, 6192, 40960, 0, 6192, 40960, 0, 6800, 2, 0, 6816, 2, 0, 6832, 2, 0, 7824, 8192, 0, 7840, 8192, 0, 7856, 8192, 0, 8272, 43520, 0, 8272, 43520, 0, 8272, 43520, 0, 8272, 43520, 0, 8288, 43520, 0, 8288, 43520, 0, 8288, 43520, 0, 8288, 43520, 0, 8304, 43520, 0, 8304, 43520, 0, 8304, 43520, 0, 8304, 43520, 0, 8960, 512, 0, 13504, 1040, 0, 13504, 1040, 0, 15956, 2048, 0, 15960, 2048, 0, 15972, 2048, 0, 15976, 2048, 0, 16916, 256, 0, 16920, 256, 0, 16932, 256, 0, 16936, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574942794991225_178_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574942794991225_178_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e34dd080 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574942794991225_178_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,306 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 414 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 3584, 1, 0, 3588, 1, 0, 3592, 1, 0, 3600, 1, 0, 3604, 1, 0, 3608, 1, 0, 5904, 512, 0, 5920, 512, 0, 7056, 1024, 0, 7072, 1024, 0, 7088, 1024, 0, 7632, 8194, 0, 7632, 8194, 0, 7648, 8194, 0, 7648, 8194, 0, 7664, 8194, 0, 7664, 8194, 0, 7936, 16420, 0, 7936, 16420, 0, 7936, 16420, 0, 8384, 34952, 0, 8384, 34952, 0, 8384, 34952, 0, 8384, 34952, 0, 9024, 73, 0, 9024, 73, 0, 9024, 73, 0, 10240, 8322, 0, 10240, 8322, 0, 10240, 8322, 0, 10256, 8322, 0, 10256, 8322, 0, 10256, 8322, 0, 10272, 8322, 0, 10272, 8322, 0, 10272, 8322, 0, 11264, 1040, 0, 11264, 1040, 0, 11280, 1040, 0, 11280, 1040, 0, 11296, 1040, 0, 11296, 1040, 0, 12544, 8194, 0, 12544, 8194, 0, 12560, 8194, 0, 12560, 8194, 0, 12576, 8194, 0, 12576, 8194, 0, 13120, 1040, 0, 13120, 1040, 0, 13136, 1040, 0, 13136, 1040, 0, 13152, 1040, 0, 13152, 1040, 0, 13760, 36, 0, 13760, 36, 0, 14720, 32, 0, 14736, 32, 0, 14752, 32, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 3584, 1, 0, 3588, 1, 0, 3592, 1, 0, 3600, 1, 0, 3604, 1, 0, 3608, 1, 0, 5904, 512, 0, 5920, 512, 0, 7056, 1024, 0, 7072, 1024, 0, 7088, 1024, 0, 7632, 8194, 0, 7632, 8194, 0, 7648, 8194, 0, 7648, 8194, 0, 7664, 8194, 0, 7664, 8194, 0, 7936, 16420, 0, 7936, 16420, 0, 7936, 16420, 0, 8384, 34952, 0, 8384, 34952, 0, 8384, 34952, 0, 8384, 34952, 0, 9024, 73, 0, 9024, 73, 0, 9024, 73, 0, 10240, 8322, 0, 10240, 8322, 0, 10240, 8322, 0, 10256, 8322, 0, 10256, 8322, 0, 10256, 8322, 0, 10272, 8322, 0, 10272, 8322, 0, 10272, 8322, 0, 11264, 1040, 0, 11264, 1040, 0, 11280, 1040, 0, 11280, 1040, 0, 11296, 1040, 0, 11296, 1040, 0, 12544, 8194, 0, 12544, 8194, 0, 12560, 8194, 0, 12560, 8194, 0, 12576, 8194, 0, 12576, 8194, 0, 13120, 1040, 0, 13120, 1040, 0, 13136, 1040, 0, 13136, 1040, 0, 13152, 1040, 0, 13152, 1040, 0, 13760, 36, 0, 13760, 36, 0, 14720, 32, 0, 14736, 32, 0, 14752, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574947657028061_179_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574947657028061_179_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..25180ffb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574947657028061_179_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,564 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((317 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((333 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (433 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (455 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (460 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((479 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((490 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (494 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 606 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4992, 4369, 0, 4992, 4369, 0, 4992, 4369, 0, 4992, 4369, 0, 5008, 4369, 0, 5008, 4369, 0, 5008, 4369, 0, 5008, 4369, 0, 6272, 4369, 0, 6272, 4369, 0, 6272, 4369, 0, 6272, 4369, 0, 7360, 8738, 0, 7360, 8738, 0, 7360, 8738, 0, 7360, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 9152, 8192, 0, 9168, 8192, 0, 9184, 8192, 0, 9856, 8192, 0, 9872, 8192, 0, 9888, 8192, 0, 10432, 8738, 0, 10432, 8738, 0, 10432, 8738, 0, 10432, 8738, 0, 10448, 8738, 0, 10448, 8738, 0, 10448, 8738, 0, 10448, 8738, 0, 10464, 8738, 0, 10464, 8738, 0, 10464, 8738, 0, 10464, 8738, 0, 11584, 4, 0, 13056, 16388, 0, 13056, 16388, 0, 15872, 34952, 0, 15872, 34952, 0, 15872, 34952, 0, 15872, 34952, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 23040, 1, 0, 29120, 32, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 30656, 34824, 0, 30656, 34824, 0, 30656, 34824, 0, 30672, 34824, 0, 30672, 34824, 0, 30672, 34824, 0, 31360, 32776, 0, 31360, 32776, 0, 31376, 32776, 0, 31376, 32776, 0, 4992, 4369, 0, 4992, 4369, 0, 4992, 4369, 0, 4992, 4369, 0, 5008, 4369, 0, 5008, 4369, 0, 5008, 4369, 0, 5008, 4369, 0, 6272, 4369, 0, 6272, 4369, 0, 6272, 4369, 0, 6272, 4369, 0, 7360, 8738, 0, 7360, 8738, 0, 7360, 8738, 0, 7360, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 9152, 8192, 0, 9168, 8192, 0, 9184, 8192, 0, 9856, 8192, 0, 9872, 8192, 0, 9888, 8192, 0, 10432, 8738, 0, 10432, 8738, 0, 10432, 8738, 0, 10432, 8738, 0, 10448, 8738, 0, 10448, 8738, 0, 10448, 8738, 0, 10448, 8738, 0, 10464, 8738, 0, 10464, 8738, 0, 10464, 8738, 0, 10464, 8738, 0, 11584, 4, 0, 13056, 16388, 0, 13056, 16388, 0, 15872, 34952, 0, 15872, 34952, 0, 15872, 34952, 0, 15872, 34952, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21316, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21320, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21332, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 21336, 43690, 0, 23040, 1, 0, 29120, 32, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 29440, 26214, 0, 30656, 34824, 0, 30656, 34824, 0, 30656, 34824, 0, 30672, 34824, 0, 30672, 34824, 0, 30672, 34824, 0, 31360, 32776, 0, 31360, 32776, 0, 31376, 32776, 0, 31376, 32776, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574960725285540_180_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574960725285540_180_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b6df7c0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574960725285540_180_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,70 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 2184, 0, 1360, 2184, 0, 1360, 2184, 0, 1376, 2184, 0, 1376, 2184, 0, 1376, 2184, 0, 1392, 2184, 0, 1392, 2184, 0, 1392, 2184, 0, 1360, 2184, 0, 1360, 2184, 0, 1360, 2184, 0, 1376, 2184, 0, 1376, 2184, 0, 1376, 2184, 0, 1392, 2184, 0, 1392, 2184, 0, 1392, 2184, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574979615627814_183_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574979615627814_183_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f9b08462 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574979615627814_183_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,319 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((289 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((296 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((363 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((377 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((384 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (403 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3904, 17, 0, 3904, 17, 0, 3920, 17, 0, 3920, 17, 0, 3936, 17, 0, 3936, 17, 0, 5440, 17, 0, 5440, 17, 0, 5444, 17, 0, 5444, 17, 0, 5456, 17, 0, 5456, 17, 0, 5460, 17, 0, 5460, 17, 0, 5472, 17, 0, 5472, 17, 0, 5476, 17, 0, 5476, 17, 0, 6336, 1, 0, 6352, 1, 0, 6368, 1, 0, 11664, 512, 0, 11680, 512, 0, 13056, 512, 0, 18964, 4, 0, 18968, 4, 0, 18980, 4, 0, 18984, 4, 0, 20672, 34952, 0, 20672, 34952, 0, 20672, 34952, 0, 20672, 34952, 0, 23248, 512, 0, 23264, 512, 0, 23280, 512, 0, 25792, 512, 0, 3904, 17, 0, 3904, 17, 0, 3920, 17, 0, 3920, 17, 0, 3936, 17, 0, 3936, 17, 0, 5440, 17, 0, 5440, 17, 0, 5444, 17, 0, 5444, 17, 0, 5456, 17, 0, 5456, 17, 0, 5460, 17, 0, 5460, 17, 0, 5472, 17, 0, 5472, 17, 0, 5476, 17, 0, 5476, 17, 0, 6336, 1, 0, 6352, 1, 0, 6368, 1, 0, 11664, 512, 0, 11680, 512, 0, 13056, 512, 0, 18964, 4, 0, 18968, 4, 0, 18980, 4, 0, 18984, 4, 0, 20672, 34952, 0, 20672, 34952, 0, 20672, 34952, 0, 20672, 34952, 0, 23248, 512, 0, 23264, 512, 0, 23280, 512, 0, 25792, 512, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574990062262774_184_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574990062262774_184_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..263f5187 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574990062262774_184_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((150 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3648, 1025, 0, 3648, 1025, 0, 3392, 57350, 0, 3392, 57350, 0, 3392, 57350, 0, 3392, 57350, 0, 3392, 57350, 0, 3008, 4384, 0, 3008, 4384, 0, 3008, 4384, 0, 4480, 73, 0, 4480, 73, 0, 4480, 73, 0, 11072, 1040, 0, 11072, 1040, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 3648, 1025, 0, 3648, 1025, 0, 3392, 57350, 0, 3392, 57350, 0, 3392, 57350, 0, 3392, 57350, 0, 3392, 57350, 0, 3008, 4384, 0, 3008, 4384, 0, 3008, 4384, 0, 4480, 73, 0, 4480, 73, 0, 4480, 73, 0, 11072, 1040, 0, 11072, 1040, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574990389103045_185_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574990389103045_185_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0d5de865 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574990389103045_185_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 16384, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 1984, 4117, 0, 1984, 4117, 0, 1984, 4117, 0, 1984, 4117, 0, 4928, 43008, 0, 4928, 43008, 0, 4928, 43008, 0, 4944, 43008, 0, 4944, 43008, 0, 4944, 43008, 0, 4960, 43008, 0, 4960, 43008, 0, 4960, 43008, 0, 2752, 16384, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 2368, 43690, 0, 1984, 4117, 0, 1984, 4117, 0, 1984, 4117, 0, 1984, 4117, 0, 4928, 43008, 0, 4928, 43008, 0, 4928, 43008, 0, 4944, 43008, 0, 4944, 43008, 0, 4944, 43008, 0, 4960, 43008, 0, 4960, 43008, 0, 4960, 43008, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756574994238008039_188_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756574994238008039_188_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7baccff8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756574994238008039_188_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((139 << 6) | (counter0 << 4)) | (counter2 << 2)) | counter3); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 8, 0, 864, 8, 0, 3024, 8704, 0, 3024, 8704, 0, 3040, 8704, 0, 3040, 8704, 0, 4052, 8704, 0, 4052, 8704, 0, 4056, 8704, 0, 4056, 8704, 0, 4060, 8704, 0, 4060, 8704, 0, 4068, 8704, 0, 4068, 8704, 0, 4072, 8704, 0, 4072, 8704, 0, 4076, 8704, 0, 4076, 8704, 0, 5840, 32770, 0, 5840, 32770, 0, 5856, 32770, 0, 5856, 32770, 0, 6996, 4104, 0, 6996, 4104, 0, 7000, 4104, 0, 7000, 4104, 0, 7004, 4104, 0, 7004, 4104, 0, 7012, 4104, 0, 7012, 4104, 0, 7016, 4104, 0, 7016, 4104, 0, 7020, 4104, 0, 7020, 4104, 0, 8917, 36, 0, 8917, 36, 0, 8918, 36, 0, 8918, 36, 0, 8921, 36, 0, 8921, 36, 0, 8922, 36, 0, 8922, 36, 0, 8925, 36, 0, 8925, 36, 0, 8926, 36, 0, 8926, 36, 0, 8933, 36, 0, 8933, 36, 0, 8934, 36, 0, 8934, 36, 0, 8937, 36, 0, 8937, 36, 0, 8938, 36, 0, 8938, 36, 0, 8941, 36, 0, 8941, 36, 0, 8942, 36, 0, 8942, 36, 0, 9876, 8, 0, 9880, 8, 0, 9884, 8, 0, 9892, 8, 0, 9896, 8, 0, 9900, 8, 0, 10320, 512, 0, 10336, 512, 0, 848, 8, 0, 864, 8, 0, 3024, 8704, 0, 3024, 8704, 0, 3040, 8704, 0, 3040, 8704, 0, 4052, 8704, 0, 4052, 8704, 0, 4056, 8704, 0, 4056, 8704, 0, 4060, 8704, 0, 4060, 8704, 0, 4068, 8704, 0, 4068, 8704, 0, 4072, 8704, 0, 4072, 8704, 0, 4076, 8704, 0, 4076, 8704, 0, 5840, 32770, 0, 5840, 32770, 0, 5856, 32770, 0, 5856, 32770, 0, 6996, 4104, 0, 6996, 4104, 0, 7000, 4104, 0, 7000, 4104, 0, 7004, 4104, 0, 7004, 4104, 0, 7012, 4104, 0, 7012, 4104, 0, 7016, 4104, 0, 7016, 4104, 0, 7020, 4104, 0, 7020, 4104, 0, 8917, 36, 0, 8917, 36, 0, 8918, 36, 0, 8918, 36, 0, 8921, 36, 0, 8921, 36, 0, 8922, 36, 0, 8922, 36, 0, 8925, 36, 0, 8925, 36, 0, 8926, 36, 0, 8926, 36, 0, 8933, 36, 0, 8933, 36, 0, 8934, 36, 0, 8934, 36, 0, 8937, 36, 0, 8937, 36, 0, 8938, 36, 0, 8938, 36, 0, 8941, 36, 0, 8941, 36, 0, 8942, 36, 0, 8942, 36, 0, 9876, 8, 0, 9880, 8, 0, 9884, 8, 0, 9892, 8, 0, 9896, 8, 0, 9900, 8, 0, 10320, 512, 0, 10336, 512, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575044200781220_189_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575044200781220_189_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..429b167b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575044200781220_189_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,175 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 960, 336, 0, 960, 336, 0, 960, 336, 0, 2176, 73, 0, 2176, 73, 0, 2176, 73, 0, 5888, 1040, 0, 5888, 1040, 0, 6208, 18724, 0, 6208, 18724, 0, 6208, 18724, 0, 6208, 18724, 0, 6208, 18724, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 1344, 64519, 0, 960, 336, 0, 960, 336, 0, 960, 336, 0, 2176, 73, 0, 2176, 73, 0, 2176, 73, 0, 5888, 1040, 0, 5888, 1040, 0, 6208, 18724, 0, 6208, 18724, 0, 6208, 18724, 0, 6208, 18724, 0, 6208, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575044391504921_190_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575044391504921_190_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..385316fa --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575044391504921_190_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3840, 16898, 0, 3840, 16898, 0, 3840, 16898, 0, 3584, 5, 0, 3584, 5, 0, 3200, 128, 0, 2816, 32768, 0, 2560, 5184, 0, 2560, 5184, 0, 2560, 5184, 0, 3840, 16898, 0, 3840, 16898, 0, 3840, 16898, 0, 3584, 5, 0, 3584, 5, 0, 3200, 128, 0, 2816, 32768, 0, 2560, 5184, 0, 2560, 5184, 0, 2560, 5184, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575047787309519_192_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575047787309519_192_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e1d62546 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575047787309519_192_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2896, 4369, 0, 2896, 4369, 0, 2896, 4369, 0, 2896, 4369, 0, 2912, 4369, 0, 2912, 4369, 0, 2912, 4369, 0, 2912, 4369, 0, 3456, 4369, 0, 3456, 4369, 0, 3456, 4369, 0, 3456, 4369, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 2896, 4369, 0, 2896, 4369, 0, 2896, 4369, 0, 2896, 4369, 0, 2912, 4369, 0, 2912, 4369, 0, 2912, 4369, 0, 2912, 4369, 0, 3456, 4369, 0, 3456, 4369, 0, 3456, 4369, 0, 3456, 4369, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 3776, 30583, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 6336, 223, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7376, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0, 7392, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575050583393852_193_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575050583393852_193_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9de93876 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575050583393852_193_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1856, 1024, 0, 2176, 16644, 0, 2176, 16644, 0, 2176, 16644, 0, 5696, 32, 0, 6400, 43018, 0, 6400, 43018, 0, 6400, 43018, 0, 6400, 43018, 0, 6400, 43018, 0, 768, 65, 0, 768, 65, 0, 1856, 1024, 0, 2176, 16644, 0, 2176, 16644, 0, 2176, 16644, 0, 5696, 32, 0, 6400, 43018, 0, 6400, 43018, 0, 6400, 43018, 0, 6400, 43018, 0, 6400, 43018, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575077004382042_195_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575077004382042_195_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1ecaa984 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575077004382042_195_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 1664, 160, 0, 1664, 160, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 2304, 64527, 0, 1664, 160, 0, 1664, 160, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0, 4288, 64527, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575077157625591_196_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575077157625591_196_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..611a5f6d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575077157625591_196_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,405 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter5 == 1)) { + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((334 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (381 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 4097, 0, 1920, 4097, 0, 1936, 4097, 0, 1936, 4097, 0, 2496, 272, 0, 2496, 272, 0, 2512, 272, 0, 2512, 272, 0, 3200, 17, 0, 3200, 17, 0, 3216, 17, 0, 3216, 17, 0, 4608, 8192, 0, 4624, 8192, 0, 4640, 8192, 0, 5312, 8192, 0, 5328, 8192, 0, 5344, 8192, 0, 10176, 4, 0, 12992, 8, 0, 14656, 17, 0, 14656, 17, 0, 16080, 2, 0, 18000, 8194, 0, 18000, 8194, 0, 22656, 68, 0, 22656, 68, 0, 23936, 68, 0, 23936, 68, 0, 24384, 34952, 0, 24384, 34952, 0, 24384, 34952, 0, 24384, 34952, 0, 1920, 4097, 0, 1920, 4097, 0, 1936, 4097, 0, 1936, 4097, 0, 2496, 272, 0, 2496, 272, 0, 2512, 272, 0, 2512, 272, 0, 3200, 17, 0, 3200, 17, 0, 3216, 17, 0, 3216, 17, 0, 4608, 8192, 0, 4624, 8192, 0, 4640, 8192, 0, 5312, 8192, 0, 5328, 8192, 0, 5344, 8192, 0, 10176, 4, 0, 12992, 8, 0, 14656, 17, 0, 14656, 17, 0, 16080, 2, 0, 18000, 8194, 0, 18000, 8194, 0, 22656, 68, 0, 22656, 68, 0, 23936, 68, 0, 23936, 68, 0, 24384, 34952, 0, 24384, 34952, 0, 24384, 34952, 0, 24384, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575086854816341_197_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575086854816341_197_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..23fc721b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575086854816341_197_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,208 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4096, 0, 1104, 4096, 0, 1120, 4096, 0, 4544, 4369, 0, 4544, 4369, 0, 4544, 4369, 0, 4544, 4369, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5952, 85, 0, 5952, 85, 0, 5952, 85, 0, 5952, 85, 0, 6592, 8, 0, 9744, 130, 0, 9744, 130, 0, 9760, 130, 0, 9760, 130, 0, 9776, 130, 0, 9776, 130, 0, 1088, 4096, 0, 1104, 4096, 0, 1120, 4096, 0, 4544, 4369, 0, 4544, 4369, 0, 4544, 4369, 0, 4544, 4369, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 4864, 30583, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5312, 65535, 0, 5952, 85, 0, 5952, 85, 0, 5952, 85, 0, 5952, 85, 0, 6592, 8, 0, 9744, 130, 0, 9744, 130, 0, 9760, 130, 0, 9760, 130, 0, 9776, 130, 0, 9776, 130, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575087575379963_198_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575087575379963_198_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3cd4412c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575087575379963_198_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 17, 0, 1216, 17, 0, 1216, 17, 0, 1216, 17, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575087698600998_199_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575087698600998_199_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1ac3598a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575087698600998_199_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,393 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((342 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (366 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6720, 1024, 0, 9296, 16388, 0, 9296, 16388, 0, 9312, 16388, 0, 9312, 16388, 0, 15184, 17, 0, 15184, 17, 0, 15188, 17, 0, 15188, 17, 0, 15192, 17, 0, 15192, 17, 0, 15200, 17, 0, 15200, 17, 0, 15204, 17, 0, 15204, 17, 0, 15208, 17, 0, 15208, 17, 0, 15632, 4352, 0, 15632, 4352, 0, 15636, 4352, 0, 15636, 4352, 0, 15640, 4352, 0, 15640, 4352, 0, 15648, 4352, 0, 15648, 4352, 0, 15652, 4352, 0, 15652, 4352, 0, 15656, 4352, 0, 15656, 4352, 0, 17408, 17476, 0, 17408, 17476, 0, 17408, 17476, 0, 17408, 17476, 0, 17856, 34952, 0, 17856, 34952, 0, 17856, 34952, 0, 17856, 34952, 0, 20928, 576, 0, 20928, 576, 0, 20944, 576, 0, 20944, 576, 0, 21888, 64, 0, 21904, 64, 0, 23424, 2, 0, 24448, 18724, 0, 24448, 18724, 0, 24448, 18724, 0, 24448, 18724, 0, 24448, 18724, 0, 576, 17, 0, 576, 17, 0, 6720, 1024, 0, 9296, 16388, 0, 9296, 16388, 0, 9312, 16388, 0, 9312, 16388, 0, 15184, 17, 0, 15184, 17, 0, 15188, 17, 0, 15188, 17, 0, 15192, 17, 0, 15192, 17, 0, 15200, 17, 0, 15200, 17, 0, 15204, 17, 0, 15204, 17, 0, 15208, 17, 0, 15208, 17, 0, 15632, 4352, 0, 15632, 4352, 0, 15636, 4352, 0, 15636, 4352, 0, 15640, 4352, 0, 15640, 4352, 0, 15648, 4352, 0, 15648, 4352, 0, 15652, 4352, 0, 15652, 4352, 0, 15656, 4352, 0, 15656, 4352, 0, 17408, 17476, 0, 17408, 17476, 0, 17408, 17476, 0, 17408, 17476, 0, 17856, 34952, 0, 17856, 34952, 0, 17856, 34952, 0, 17856, 34952, 0, 20928, 576, 0, 20928, 576, 0, 20944, 576, 0, 20944, 576, 0, 21888, 64, 0, 21904, 64, 0, 23424, 2, 0, 24448, 18724, 0, 24448, 18724, 0, 24448, 18724, 0, 24448, 18724, 0, 24448, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575107251819709_200_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575107251819709_200_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bbc8d6fb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575107251819709_200_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,300 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((236 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((245 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((322 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((340 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1744, 1, 0, 1760, 1, 0, 1776, 1, 0, 2384, 1, 0, 2400, 1, 0, 2416, 1, 0, 3984, 1, 0, 4000, 1, 0, 4016, 1, 0, 12480, 17540, 0, 12480, 17540, 0, 12480, 17540, 0, 12480, 17540, 0, 14160, 10370, 0, 14160, 10370, 0, 14160, 10370, 0, 14160, 10370, 0, 15120, 63, 0, 15120, 63, 0, 15120, 63, 0, 15120, 63, 0, 15120, 63, 0, 15120, 63, 0, 15124, 63, 0, 15124, 63, 0, 15124, 63, 0, 15124, 63, 0, 15124, 63, 0, 15124, 63, 0, 15128, 63, 0, 15128, 63, 0, 15128, 63, 0, 15128, 63, 0, 15128, 63, 0, 15128, 63, 0, 15696, 7, 0, 15696, 7, 0, 15696, 7, 0, 15700, 7, 0, 15700, 7, 0, 15700, 7, 0, 15704, 7, 0, 15704, 7, 0, 15704, 7, 0, 18704, 4416, 0, 18704, 4416, 0, 18704, 4416, 0, 23440, 4352, 0, 23440, 4352, 0, 1744, 1, 0, 1760, 1, 0, 1776, 1, 0, 2384, 1, 0, 2400, 1, 0, 2416, 1, 0, 3984, 1, 0, 4000, 1, 0, 4016, 1, 0, 12480, 17540, 0, 12480, 17540, 0, 12480, 17540, 0, 12480, 17540, 0, 14160, 10370, 0, 14160, 10370, 0, 14160, 10370, 0, 14160, 10370, 0, 15120, 63, 0, 15120, 63, 0, 15120, 63, 0, 15120, 63, 0, 15120, 63, 0, 15120, 63, 0, 15124, 63, 0, 15124, 63, 0, 15124, 63, 0, 15124, 63, 0, 15124, 63, 0, 15124, 63, 0, 15128, 63, 0, 15128, 63, 0, 15128, 63, 0, 15128, 63, 0, 15128, 63, 0, 15128, 63, 0, 15696, 7, 0, 15696, 7, 0, 15696, 7, 0, 15700, 7, 0, 15700, 7, 0, 15700, 7, 0, 15704, 7, 0, 15704, 7, 0, 15704, 7, 0, 18704, 4416, 0, 18704, 4416, 0, 18704, 4416, 0, 23440, 4352, 0, 23440, 4352, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575153863084318_202_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575153863084318_202_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a33357df --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575153863084318_202_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,69 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575153939855997_203_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575153939855997_203_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575153939855997_203_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575154165779274_205_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575154165779274_205_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d12067b8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575154165779274_205_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,129 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 4096, 0, 1616, 4096, 0, 1632, 4096, 0, 3712, 4097, 0, 3712, 4097, 0, 3716, 4097, 0, 3716, 4097, 0, 3720, 4097, 0, 3720, 4097, 0, 3728, 4097, 0, 3728, 4097, 0, 3732, 4097, 0, 3732, 4097, 0, 3736, 4097, 0, 3736, 4097, 0, 3744, 4097, 0, 3744, 4097, 0, 3748, 4097, 0, 3748, 4097, 0, 3752, 4097, 0, 3752, 4097, 0, 4288, 1040, 0, 4288, 1040, 0, 4608, 18724, 0, 4608, 18724, 0, 4608, 18724, 0, 4608, 18724, 0, 4608, 18724, 0, 1600, 4096, 0, 1616, 4096, 0, 1632, 4096, 0, 3712, 4097, 0, 3712, 4097, 0, 3716, 4097, 0, 3716, 4097, 0, 3720, 4097, 0, 3720, 4097, 0, 3728, 4097, 0, 3728, 4097, 0, 3732, 4097, 0, 3732, 4097, 0, 3736, 4097, 0, 3736, 4097, 0, 3744, 4097, 0, 3744, 4097, 0, 3748, 4097, 0, 3748, 4097, 0, 3752, 4097, 0, 3752, 4097, 0, 4288, 1040, 0, 4288, 1040, 0, 4608, 18724, 0, 4608, 18724, 0, 4608, 18724, 0, 4608, 18724, 0, 4608, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575155265206173_206_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575155265206173_206_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..04188fb7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575155265206173_206_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,220 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((219 << 6) | (counter2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 570 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 2000, 3, 0, 2000, 3, 0, 3216, 32783, 0, 3216, 32783, 0, 3216, 32783, 0, 3216, 32783, 0, 3216, 32783, 0, 3220, 32783, 0, 3220, 32783, 0, 3220, 32783, 0, 3220, 32783, 0, 3220, 32783, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 4800, 32769, 0, 4800, 32769, 0, 7440, 2049, 0, 7440, 2049, 0, 7456, 2049, 0, 7456, 2049, 0, 7472, 2049, 0, 7472, 2049, 0, 8336, 2176, 0, 8336, 2176, 0, 8352, 2176, 0, 8352, 2176, 0, 8368, 2176, 0, 8368, 2176, 0, 11280, 1, 0, 11296, 1, 0, 11312, 1, 0, 11728, 8192, 0, 11744, 8192, 0, 11760, 8192, 0, 13460, 64, 0, 13464, 64, 0, 13468, 64, 0, 13476, 64, 0, 13480, 64, 0, 13484, 64, 0, 13492, 64, 0, 13496, 64, 0, 13500, 64, 0, 14036, 8, 0, 14040, 8, 0, 14044, 8, 0, 14052, 8, 0, 14056, 8, 0, 14060, 8, 0, 14068, 8, 0, 14072, 8, 0, 14076, 8, 0, 15248, 8, 0, 15264, 8, 0, 15280, 8, 0, 15696, 16384, 0, 15712, 16384, 0, 15728, 16384, 0, 16400, 61443, 0, 16400, 61443, 0, 16400, 61443, 0, 16400, 61443, 0, 16400, 61443, 0, 16400, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16432, 61443, 0, 16432, 61443, 0, 16432, 61443, 0, 16432, 61443, 0, 16432, 61443, 0, 16432, 61443, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 2000, 3, 0, 2000, 3, 0, 3216, 32783, 0, 3216, 32783, 0, 3216, 32783, 0, 3216, 32783, 0, 3216, 32783, 0, 3220, 32783, 0, 3220, 32783, 0, 3220, 32783, 0, 3220, 32783, 0, 3220, 32783, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3920, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 3924, 57359, 0, 4800, 32769, 0, 4800, 32769, 0, 7440, 2049, 0, 7440, 2049, 0, 7456, 2049, 0, 7456, 2049, 0, 7472, 2049, 0, 7472, 2049, 0, 8336, 2176, 0, 8336, 2176, 0, 8352, 2176, 0, 8352, 2176, 0, 8368, 2176, 0, 8368, 2176, 0, 11280, 1, 0, 11296, 1, 0, 11312, 1, 0, 11728, 8192, 0, 11744, 8192, 0, 11760, 8192, 0, 13460, 64, 0, 13464, 64, 0, 13468, 64, 0, 13476, 64, 0, 13480, 64, 0, 13484, 64, 0, 13492, 64, 0, 13496, 64, 0, 13500, 64, 0, 14036, 8, 0, 14040, 8, 0, 14044, 8, 0, 14052, 8, 0, 14056, 8, 0, 14060, 8, 0, 14068, 8, 0, 14072, 8, 0, 14076, 8, 0, 15248, 8, 0, 15264, 8, 0, 15280, 8, 0, 15696, 16384, 0, 15712, 16384, 0, 15728, 16384, 0, 16400, 61443, 0, 16400, 61443, 0, 16400, 61443, 0, 16400, 61443, 0, 16400, 61443, 0, 16400, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16416, 61443, 0, 16432, 61443, 0, 16432, 61443, 0, 16432, 61443, 0, 16432, 61443, 0, 16432, 61443, 0, 16432, 61443, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575176148357054_209_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575176148357054_209_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c3844bf --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575176148357054_209_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,349 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 1))) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((206 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((260 << 6) | (i2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + case 1: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((294 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((304 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((313 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((i6 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 576 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 2256, 16640, 0, 2256, 16640, 0, 2272, 16640, 0, 2272, 16640, 0, 2288, 16640, 0, 2288, 16640, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 8384, 17, 0, 8384, 17, 0, 9280, 17476, 0, 9280, 17476, 0, 9280, 17476, 0, 9280, 17476, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 15232, 16405, 0, 15232, 16405, 0, 15232, 16405, 0, 15232, 16405, 0, 15248, 16405, 0, 15248, 16405, 0, 15248, 16405, 0, 15248, 16405, 0, 15264, 16405, 0, 15264, 16405, 0, 15264, 16405, 0, 15264, 16405, 0, 16644, 4, 0, 16648, 4, 0, 16660, 4, 0, 16664, 4, 0, 16676, 4, 0, 16680, 4, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 18816, 32768, 0, 18820, 32768, 0, 18824, 32768, 0, 18832, 32768, 0, 18836, 32768, 0, 18840, 32768, 0, 19456, 85, 0, 19456, 85, 0, 19456, 85, 0, 19456, 85, 0, 19460, 85, 0, 19460, 85, 0, 19460, 85, 0, 19460, 85, 0, 19464, 85, 0, 19464, 85, 0, 19464, 85, 0, 19464, 85, 0, 19472, 85, 0, 19472, 85, 0, 19472, 85, 0, 19472, 85, 0, 19476, 85, 0, 19476, 85, 0, 19476, 85, 0, 19476, 85, 0, 19480, 85, 0, 19480, 85, 0, 19480, 85, 0, 19480, 85, 0, 768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 2256, 16640, 0, 2256, 16640, 0, 2272, 16640, 0, 2272, 16640, 0, 2288, 16640, 0, 2288, 16640, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 7744, 21845, 0, 8384, 17, 0, 8384, 17, 0, 9280, 17476, 0, 9280, 17476, 0, 9280, 17476, 0, 9280, 17476, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 10304, 21845, 0, 15232, 16405, 0, 15232, 16405, 0, 15232, 16405, 0, 15232, 16405, 0, 15248, 16405, 0, 15248, 16405, 0, 15248, 16405, 0, 15248, 16405, 0, 15264, 16405, 0, 15264, 16405, 0, 15264, 16405, 0, 15264, 16405, 0, 16644, 4, 0, 16648, 4, 0, 16660, 4, 0, 16664, 4, 0, 16676, 4, 0, 16680, 4, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17856, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 17872, 21845, 0, 18816, 32768, 0, 18820, 32768, 0, 18824, 32768, 0, 18832, 32768, 0, 18836, 32768, 0, 18840, 32768, 0, 19456, 85, 0, 19456, 85, 0, 19456, 85, 0, 19456, 85, 0, 19460, 85, 0, 19460, 85, 0, 19460, 85, 0, 19460, 85, 0, 19464, 85, 0, 19464, 85, 0, 19464, 85, 0, 19464, 85, 0, 19472, 85, 0, 19472, 85, 0, 19472, 85, 0, 19472, 85, 0, 19476, 85, 0, 19476, 85, 0, 19476, 85, 0, 19476, 85, 0, 19480, 85, 0, 19480, 85, 0, 19480, 85, 0, 19480, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575276134356843_212_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575276134356843_212_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff67423f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575276134356843_212_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 3, 0, 832, 3, 0, 576, 8192, 0, 1472, 85, 0, 1472, 85, 0, 1472, 85, 0, 1472, 85, 0, 2576, 8, 0, 3152, 32768, 0, 832, 3, 0, 832, 3, 0, 576, 8192, 0, 1472, 85, 0, 1472, 85, 0, 1472, 85, 0, 1472, 85, 0, 2576, 8, 0, 3152, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575276413184408_213_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575276413184408_213_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6d7fede --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575276413184408_213_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 2512, 34952, 0, 2512, 34952, 0, 2512, 34952, 0, 2512, 34952, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 2512, 34952, 0, 2512, 34952, 0, 2512, 34952, 0, 2512, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575276617209242_214_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575276617209242_214_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..855e9924 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575276617209242_214_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,166 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8640, 2048, 0, 8640, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575276747052844_215_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575276747052844_215_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d7c5de84 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575276747052844_215_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 7)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 128, 0, 1056, 128, 0, 1072, 128, 0, 1040, 128, 0, 1056, 128, 0, 1072, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575289114879574_217_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575289114879574_217_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c72726d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575289114879574_217_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((219 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 49155, 0, 1088, 49155, 0, 1088, 49155, 0, 1088, 49155, 0, 4736, 16, 0, 5632, 64, 0, 10624, 2, 0, 11664, 8194, 0, 11664, 8194, 0, 11680, 8194, 0, 11680, 8194, 0, 11696, 8194, 0, 11696, 8194, 0, 13460, 2, 0, 13464, 2, 0, 13468, 2, 0, 13476, 2, 0, 13480, 2, 0, 13484, 2, 0, 13492, 2, 0, 13496, 2, 0, 13500, 2, 0, 14036, 2, 0, 14040, 2, 0, 14044, 2, 0, 14052, 2, 0, 14056, 2, 0, 14060, 2, 0, 14068, 2, 0, 14072, 2, 0, 14076, 2, 0, 1088, 49155, 0, 1088, 49155, 0, 1088, 49155, 0, 1088, 49155, 0, 4736, 16, 0, 5632, 64, 0, 10624, 2, 0, 11664, 8194, 0, 11664, 8194, 0, 11680, 8194, 0, 11680, 8194, 0, 11696, 8194, 0, 11696, 8194, 0, 13460, 2, 0, 13464, 2, 0, 13468, 2, 0, 13476, 2, 0, 13480, 2, 0, 13484, 2, 0, 13492, 2, 0, 13496, 2, 0, 13500, 2, 0, 14036, 2, 0, 14040, 2, 0, 14044, 2, 0, 14052, 2, 0, 14056, 2, 0, 14060, 2, 0, 14068, 2, 0, 14072, 2, 0, 14076, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575290156882421_218_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575290156882421_218_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..85e60b5a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575290156882421_218_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 16384, 0, 3648, 256, 0, 5120, 32769, 0, 5120, 32769, 0, 6928, 1024, 0, 6944, 1024, 0, 8144, 1024, 0, 8160, 1024, 0, 8720, 8192, 0, 8736, 8192, 0, 12736, 260, 0, 12736, 260, 0, 12752, 260, 0, 12752, 260, 0, 1856, 16384, 0, 3648, 256, 0, 5120, 32769, 0, 5120, 32769, 0, 6928, 1024, 0, 6944, 1024, 0, 8144, 1024, 0, 8160, 1024, 0, 8720, 8192, 0, 8736, 8192, 0, 12736, 260, 0, 12736, 260, 0, 12752, 260, 0, 12752, 260, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575291584826449_219_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575291584826449_219_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..054a2972 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575291584826449_219_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,276 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [10560, 17, 0, 10560, 17, 0, 11664, 512, 0, 11680, 512, 0, 15360, 32, 0, 15376, 32, 0, 15680, 17476, 0, 15680, 17476, 0, 15680, 17476, 0, 15680, 17476, 0, 16128, 34952, 0, 16128, 34952, 0, 16128, 34952, 0, 16128, 34952, 0, 10560, 17, 0, 10560, 17, 0, 11664, 512, 0, 11680, 512, 0, 15360, 32, 0, 15376, 32, 0, 15680, 17476, 0, 15680, 17476, 0, 15680, 17476, 0, 15680, 17476, 0, 16128, 34952, 0, 16128, 34952, 0, 16128, 34952, 0, 16128, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575291875822074_220_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575291875822074_220_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff717d1b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575291875822074_220_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,212 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3712, 2, 0, 5248, 32, 0, 5824, 17476, 0, 5824, 17476, 0, 5824, 17476, 0, 5824, 17476, 0, 6272, 34952, 0, 6272, 34952, 0, 6272, 34952, 0, 6272, 34952, 0, 576, 17, 0, 576, 17, 0, 3712, 2, 0, 5248, 32, 0, 5824, 17476, 0, 5824, 17476, 0, 5824, 17476, 0, 5824, 17476, 0, 6272, 34952, 0, 6272, 34952, 0, 6272, 34952, 0, 6272, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575292059815107_221_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575292059815107_221_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..602d519f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575292059815107_221_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 450 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1616, 4369, 0, 1616, 4369, 0, 1616, 4369, 0, 1616, 4369, 0, 1632, 4369, 0, 1632, 4369, 0, 1632, 4369, 0, 1632, 4369, 0, 2772, 4369, 0, 2772, 4369, 0, 2772, 4369, 0, 2772, 4369, 0, 2776, 4369, 0, 2776, 4369, 0, 2776, 4369, 0, 2776, 4369, 0, 2780, 4369, 0, 2780, 4369, 0, 2780, 4369, 0, 2780, 4369, 0, 2788, 4369, 0, 2788, 4369, 0, 2788, 4369, 0, 2788, 4369, 0, 2792, 4369, 0, 2792, 4369, 0, 2792, 4369, 0, 2792, 4369, 0, 2796, 4369, 0, 2796, 4369, 0, 2796, 4369, 0, 2796, 4369, 0, 3648, 8738, 0, 3648, 8738, 0, 3648, 8738, 0, 3648, 8738, 0, 4688, 8738, 0, 4688, 8738, 0, 4688, 8738, 0, 4688, 8738, 0, 4704, 8738, 0, 4704, 8738, 0, 4704, 8738, 0, 4704, 8738, 0, 4720, 8738, 0, 4720, 8738, 0, 4720, 8738, 0, 4720, 8738, 0, 5632, 16, 0, 6592, 5, 0, 6592, 5, 0, 6608, 5, 0, 6608, 5, 0, 6624, 5, 0, 6624, 5, 0, 7040, 21760, 0, 7040, 21760, 0, 7040, 21760, 0, 7040, 21760, 0, 7056, 21760, 0, 7056, 21760, 0, 7056, 21760, 0, 7056, 21760, 0, 7072, 21760, 0, 7072, 21760, 0, 7072, 21760, 0, 7072, 21760, 0, 8640, 4096, 0, 11584, 1060, 0, 11584, 1060, 0, 11584, 1060, 0, 12992, 1056, 0, 12992, 1056, 0, 576, 17, 0, 576, 17, 0, 1616, 4369, 0, 1616, 4369, 0, 1616, 4369, 0, 1616, 4369, 0, 1632, 4369, 0, 1632, 4369, 0, 1632, 4369, 0, 1632, 4369, 0, 2772, 4369, 0, 2772, 4369, 0, 2772, 4369, 0, 2772, 4369, 0, 2776, 4369, 0, 2776, 4369, 0, 2776, 4369, 0, 2776, 4369, 0, 2780, 4369, 0, 2780, 4369, 0, 2780, 4369, 0, 2780, 4369, 0, 2788, 4369, 0, 2788, 4369, 0, 2788, 4369, 0, 2788, 4369, 0, 2792, 4369, 0, 2792, 4369, 0, 2792, 4369, 0, 2792, 4369, 0, 2796, 4369, 0, 2796, 4369, 0, 2796, 4369, 0, 2796, 4369, 0, 3648, 8738, 0, 3648, 8738, 0, 3648, 8738, 0, 3648, 8738, 0, 4688, 8738, 0, 4688, 8738, 0, 4688, 8738, 0, 4688, 8738, 0, 4704, 8738, 0, 4704, 8738, 0, 4704, 8738, 0, 4704, 8738, 0, 4720, 8738, 0, 4720, 8738, 0, 4720, 8738, 0, 4720, 8738, 0, 5632, 16, 0, 6592, 5, 0, 6592, 5, 0, 6608, 5, 0, 6608, 5, 0, 6624, 5, 0, 6624, 5, 0, 7040, 21760, 0, 7040, 21760, 0, 7040, 21760, 0, 7040, 21760, 0, 7056, 21760, 0, 7056, 21760, 0, 7056, 21760, 0, 7056, 21760, 0, 7072, 21760, 0, 7072, 21760, 0, 7072, 21760, 0, 7072, 21760, 0, 8640, 4096, 0, 11584, 1060, 0, 11584, 1060, 0, 11584, 1060, 0, 12992, 1056, 0, 12992, 1056, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575296752270942_222_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575296752270942_222_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575296752270942_222_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575296852304333_223_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575296852304333_223_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..456bd696 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575296852304333_223_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,349 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((338 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (357 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7232, 256, 0, 19328, 2, 0, 19648, 17476, 0, 19648, 17476, 0, 19648, 17476, 0, 19648, 17476, 0, 20096, 34952, 0, 20096, 34952, 0, 20096, 34952, 0, 20096, 34952, 0, 20736, 17, 0, 20736, 17, 0, 22848, 17476, 0, 22848, 17476, 0, 22848, 17476, 0, 22848, 17476, 0, 23296, 34952, 0, 23296, 34952, 0, 23296, 34952, 0, 23296, 34952, 0, 7232, 256, 0, 19328, 2, 0, 19648, 17476, 0, 19648, 17476, 0, 19648, 17476, 0, 19648, 17476, 0, 20096, 34952, 0, 20096, 34952, 0, 20096, 34952, 0, 20096, 34952, 0, 20736, 17, 0, 20736, 17, 0, 22848, 17476, 0, 22848, 17476, 0, 22848, 17476, 0, 22848, 17476, 0, 23296, 34952, 0, 23296, 34952, 0, 23296, 34952, 0, 23296, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575302675832457_226_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575302675832457_226_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9452b6a6 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575302675832457_226_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0, 1920, 52428, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575302798978120_227_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575302798978120_227_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6edb431 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575302798978120_227_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,426 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((161 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((264 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (392 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (412 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (421 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (438 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 438 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1984, 32768, 0, 2000, 32768, 0, 4864, 10, 0, 4864, 10, 0, 4880, 10, 0, 4880, 10, 0, 4896, 10, 0, 4896, 10, 0, 5440, 32768, 0, 5456, 32768, 0, 5472, 32768, 0, 5760, 2080, 0, 5760, 2080, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 8000, 4096, 0, 9216, 128, 0, 9232, 128, 0, 9248, 128, 0, 10304, 128, 0, 10308, 128, 0, 10312, 128, 0, 10320, 128, 0, 10324, 128, 0, 10328, 128, 0, 10336, 128, 0, 10340, 128, 0, 10344, 128, 0, 12160, 128, 0, 12176, 128, 0, 12192, 128, 0, 14016, 32769, 0, 14016, 32769, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 17616, 17745, 0, 17616, 17745, 0, 17616, 17745, 0, 17616, 17745, 0, 17616, 17745, 0, 17616, 17745, 0, 19456, 64, 0, 20096, 64, 0, 20672, 4416, 0, 20672, 4416, 0, 20672, 4416, 0, 22144, 64, 0, 25664, 33280, 0, 25664, 33280, 0, 26368, 33280, 0, 26368, 33280, 0, 26944, 1040, 0, 26944, 1040, 0, 27584, 16384, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1984, 32768, 0, 2000, 32768, 0, 4864, 10, 0, 4864, 10, 0, 4880, 10, 0, 4880, 10, 0, 4896, 10, 0, 4896, 10, 0, 5440, 32768, 0, 5456, 32768, 0, 5472, 32768, 0, 5760, 2080, 0, 5760, 2080, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 6336, 43690, 0, 8000, 4096, 0, 9216, 128, 0, 9232, 128, 0, 9248, 128, 0, 10304, 128, 0, 10308, 128, 0, 10312, 128, 0, 10320, 128, 0, 10324, 128, 0, 10328, 128, 0, 10336, 128, 0, 10340, 128, 0, 10344, 128, 0, 12160, 128, 0, 12176, 128, 0, 12192, 128, 0, 14016, 32769, 0, 14016, 32769, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 15056, 43562, 0, 17616, 17745, 0, 17616, 17745, 0, 17616, 17745, 0, 17616, 17745, 0, 17616, 17745, 0, 17616, 17745, 0, 19456, 64, 0, 20096, 64, 0, 20672, 4416, 0, 20672, 4416, 0, 20672, 4416, 0, 22144, 64, 0, 25664, 33280, 0, 25664, 33280, 0, 26368, 33280, 0, 26368, 33280, 0, 26944, 1040, 0, 26944, 1040, 0, 27584, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575332303779966_229_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575332303779966_229_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7051e857 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575332303779966_229_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 16, 0, 2560, 16, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575332439130746_230_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575332439130746_230_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..235127d2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575332439130746_230_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2320, 1, 0, 2336, 1, 0, 3024, 20, 0, 3024, 20, 0, 3040, 20, 0, 3040, 20, 0, 3664, 20, 0, 3664, 20, 0, 3680, 20, 0, 3680, 20, 0, 5568, 32768, 0, 8768, 8322, 0, 8768, 8322, 0, 8768, 8322, 0, 11136, 2080, 0, 11136, 2080, 0, 11584, 2048, 0, 12224, 85, 0, 12224, 85, 0, 12224, 85, 0, 12224, 85, 0, 13504, 32, 0, 13520, 32, 0, 2320, 1, 0, 2336, 1, 0, 3024, 20, 0, 3024, 20, 0, 3040, 20, 0, 3040, 20, 0, 3664, 20, 0, 3664, 20, 0, 3680, 20, 0, 3680, 20, 0, 5568, 32768, 0, 8768, 8322, 0, 8768, 8322, 0, 8768, 8322, 0, 11136, 2080, 0, 11136, 2080, 0, 11584, 2048, 0, 12224, 85, 0, 12224, 85, 0, 12224, 85, 0, 12224, 85, 0, 13504, 32, 0, 13520, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575337915309689_232_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575337915309689_232_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db491b08 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575337915309689_232_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 582 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 5120, 0, 2688, 5120, 0, 2704, 5120, 0, 2704, 5120, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 4928, 20481, 0, 4928, 20481, 0, 4928, 20481, 0, 4932, 20481, 0, 4932, 20481, 0, 4932, 20481, 0, 4944, 20481, 0, 4944, 20481, 0, 4944, 20481, 0, 4948, 20481, 0, 4948, 20481, 0, 4948, 20481, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 7744, 256, 0, 7760, 256, 0, 8960, 73, 0, 8960, 73, 0, 8960, 73, 0, 9536, 1040, 0, 9536, 1040, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 2688, 5120, 0, 2688, 5120, 0, 2704, 5120, 0, 2704, 5120, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3776, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3780, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3792, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 3796, 21845, 0, 4928, 20481, 0, 4928, 20481, 0, 4928, 20481, 0, 4932, 20481, 0, 4932, 20481, 0, 4932, 20481, 0, 4944, 20481, 0, 4944, 20481, 0, 4944, 20481, 0, 4948, 20481, 0, 4948, 20481, 0, 4948, 20481, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6528, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6532, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6544, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 6548, 21845, 0, 7744, 256, 0, 7760, 256, 0, 8960, 73, 0, 8960, 73, 0, 8960, 73, 0, 9536, 1040, 0, 9536, 1040, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0, 9856, 28086, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575344471870959_234_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575344471870959_234_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1f21cb15 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575344471870959_234_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,94 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575344589133746_235_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575344589133746_235_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a794429e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575344589133746_235_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,485 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((423 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((465 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((474 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((499 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (518 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((542 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((549 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (561 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 1, 0, 15872, 32, 0, 18304, 1024, 0, 20160, 1024, 0, 20480, 1024, 0, 22464, 34952, 0, 22464, 34952, 0, 22464, 34952, 0, 22464, 34952, 0, 24640, 4, 0, 34708, 21760, 0, 34708, 21760, 0, 34708, 21760, 0, 34708, 21760, 0, 34712, 21760, 0, 34712, 21760, 0, 34712, 21760, 0, 34712, 21760, 0, 34724, 21760, 0, 34724, 21760, 0, 34724, 21760, 0, 34724, 21760, 0, 34728, 21760, 0, 34728, 21760, 0, 34728, 21760, 0, 34728, 21760, 0, 35156, 21504, 0, 35156, 21504, 0, 35156, 21504, 0, 35160, 21504, 0, 35160, 21504, 0, 35160, 21504, 0, 35172, 21504, 0, 35172, 21504, 0, 35172, 21504, 0, 35176, 21504, 0, 35176, 21504, 0, 35176, 21504, 0, 1408, 1, 0, 15872, 32, 0, 18304, 1024, 0, 20160, 1024, 0, 20480, 1024, 0, 22464, 34952, 0, 22464, 34952, 0, 22464, 34952, 0, 22464, 34952, 0, 24640, 4, 0, 34708, 21760, 0, 34708, 21760, 0, 34708, 21760, 0, 34708, 21760, 0, 34712, 21760, 0, 34712, 21760, 0, 34712, 21760, 0, 34712, 21760, 0, 34724, 21760, 0, 34724, 21760, 0, 34724, 21760, 0, 34724, 21760, 0, 34728, 21760, 0, 34728, 21760, 0, 34728, 21760, 0, 34728, 21760, 0, 35156, 21504, 0, 35156, 21504, 0, 35156, 21504, 0, 35160, 21504, 0, 35160, 21504, 0, 35160, 21504, 0, 35172, 21504, 0, 35172, 21504, 0, 35172, 21504, 0, 35176, 21504, 0, 35176, 21504, 0, 35176, 21504, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575377318678519_238_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575377318678519_238_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0ce94bf7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575377318678519_238_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2000, 32768, 0, 2016, 32768, 0, 2624, 85, 0, 2624, 85, 0, 2624, 85, 0, 2624, 85, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 2000, 32768, 0, 2016, 32768, 0, 2624, 85, 0, 2624, 85, 0, 2624, 85, 0, 2624, 85, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 3648, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0, 4224, 10922, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575458371945295_241_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575458371945295_241_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d3b804a9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575458371945295_241_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,202 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3520, 4096, 0, 4160, 16, 0, 5056, 4, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3520, 4096, 0, 4160, 16, 0, 5056, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575458577081490_242_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575458577081490_242_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a794edc9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575458577081490_242_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((198 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 32769, 0, 1664, 32769, 0, 2240, 4160, 0, 2240, 4160, 0, 13632, 18724, 0, 13632, 18724, 0, 13632, 18724, 0, 13632, 18724, 0, 13632, 18724, 0, 1664, 32769, 0, 1664, 32769, 0, 2240, 4160, 0, 2240, 4160, 0, 13632, 18724, 0, 13632, 18724, 0, 13632, 18724, 0, 13632, 18724, 0, 13632, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575458937795538_243_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575458937795538_243_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..158a3f57 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575458937795538_243_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1472, 4224, 0, 1472, 4224, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1728, 57375, 0, 1472, 4224, 0, 1472, 4224, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575459031669354_244_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575459031669354_244_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e30bc7fc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575459031669354_244_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,285 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((176 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 612 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 5072, 8, 0, 5088, 8, 0, 9344, 4096, 0, 9360, 4096, 0, 9376, 4096, 0, 10112, 3, 0, 10112, 3, 0, 10128, 3, 0, 10128, 3, 0, 10144, 3, 0, 10144, 3, 0, 11268, 23, 0, 11268, 23, 0, 11268, 23, 0, 11268, 23, 0, 11272, 23, 0, 11272, 23, 0, 11272, 23, 0, 11272, 23, 0, 11284, 23, 0, 11284, 23, 0, 11284, 23, 0, 11284, 23, 0, 11288, 23, 0, 11288, 23, 0, 11288, 23, 0, 11288, 23, 0, 11300, 23, 0, 11300, 23, 0, 11300, 23, 0, 11300, 23, 0, 11304, 23, 0, 11304, 23, 0, 11304, 23, 0, 11304, 23, 0, 11712, 32256, 0, 11712, 32256, 0, 11712, 32256, 0, 11712, 32256, 0, 11712, 32256, 0, 11712, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 12928, 1024, 0, 12944, 1024, 0, 12960, 1024, 0, 13248, 18688, 0, 13248, 18688, 0, 13248, 18688, 0, 13264, 18688, 0, 13264, 18688, 0, 13264, 18688, 0, 13280, 18688, 0, 13280, 18688, 0, 13280, 18688, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 14144, 32256, 0, 14144, 32256, 0, 14144, 32256, 0, 14144, 32256, 0, 14144, 32256, 0, 14144, 32256, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 5072, 8, 0, 5088, 8, 0, 9344, 4096, 0, 9360, 4096, 0, 9376, 4096, 0, 10112, 3, 0, 10112, 3, 0, 10128, 3, 0, 10128, 3, 0, 10144, 3, 0, 10144, 3, 0, 11268, 23, 0, 11268, 23, 0, 11268, 23, 0, 11268, 23, 0, 11272, 23, 0, 11272, 23, 0, 11272, 23, 0, 11272, 23, 0, 11284, 23, 0, 11284, 23, 0, 11284, 23, 0, 11284, 23, 0, 11288, 23, 0, 11288, 23, 0, 11288, 23, 0, 11288, 23, 0, 11300, 23, 0, 11300, 23, 0, 11300, 23, 0, 11300, 23, 0, 11304, 23, 0, 11304, 23, 0, 11304, 23, 0, 11304, 23, 0, 11712, 32256, 0, 11712, 32256, 0, 11712, 32256, 0, 11712, 32256, 0, 11712, 32256, 0, 11712, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11728, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 11744, 32256, 0, 12928, 1024, 0, 12944, 1024, 0, 12960, 1024, 0, 13248, 18688, 0, 13248, 18688, 0, 13248, 18688, 0, 13264, 18688, 0, 13264, 18688, 0, 13264, 18688, 0, 13280, 18688, 0, 13280, 18688, 0, 13280, 18688, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13696, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13712, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 13728, 32512, 0, 14144, 32256, 0, 14144, 32256, 0, 14144, 32256, 0, 14144, 32256, 0, 14144, 32256, 0, 14144, 32256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575478545653864_247_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575478545653864_247_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8bada4fc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575478545653864_247_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1984, 16, 0, 4864, 8322, 0, 4864, 8322, 0, 4864, 8322, 0, 4880, 8322, 0, 4880, 8322, 0, 4880, 8322, 0, 6592, 2050, 0, 6592, 2050, 0, 6608, 2050, 0, 6608, 2050, 0, 8004, 8452, 0, 8004, 8452, 0, 8004, 8452, 0, 8008, 8452, 0, 8008, 8452, 0, 8008, 8452, 0, 8012, 8452, 0, 8012, 8452, 0, 8012, 8452, 0, 8020, 8452, 0, 8020, 8452, 0, 8020, 8452, 0, 8024, 8452, 0, 8024, 8452, 0, 8024, 8452, 0, 8028, 8452, 0, 8028, 8452, 0, 8028, 8452, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1984, 16, 0, 4864, 8322, 0, 4864, 8322, 0, 4864, 8322, 0, 4880, 8322, 0, 4880, 8322, 0, 4880, 8322, 0, 6592, 2050, 0, 6592, 2050, 0, 6608, 2050, 0, 6608, 2050, 0, 8004, 8452, 0, 8004, 8452, 0, 8004, 8452, 0, 8008, 8452, 0, 8008, 8452, 0, 8008, 8452, 0, 8012, 8452, 0, 8012, 8452, 0, 8012, 8452, 0, 8020, 8452, 0, 8020, 8452, 0, 8020, 8452, 0, 8024, 8452, 0, 8024, 8452, 0, 8024, 8452, 0, 8028, 8452, 0, 8028, 8452, 0, 8028, 8452, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575480207837694_248_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575480207837694_248_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d79fc744 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575480207837694_248_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + if ((i3 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3728, 8704, 0, 3728, 8704, 0, 3744, 8704, 0, 3744, 8704, 0, 5968, 32, 0, 5984, 32, 0, 7872, 16384, 0, 7876, 16384, 0, 7880, 16384, 0, 7888, 16384, 0, 7892, 16384, 0, 7896, 16384, 0, 8704, 34952, 0, 8704, 34952, 0, 8704, 34952, 0, 8704, 34952, 0, 3728, 8704, 0, 3728, 8704, 0, 3744, 8704, 0, 3744, 8704, 0, 5968, 32, 0, 5984, 32, 0, 7872, 16384, 0, 7876, 16384, 0, 7880, 16384, 0, 7888, 16384, 0, 7892, 16384, 0, 7896, 16384, 0, 8704, 34952, 0, 8704, 34952, 0, 8704, 34952, 0, 8704, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575781505365710_250_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575781505365710_250_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5d025bbb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575781505365710_250_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 37376, 0, 1040, 37376, 0, 1040, 37376, 0, 1056, 37376, 0, 1056, 37376, 0, 1056, 37376, 0, 1072, 37376, 0, 1072, 37376, 0, 1072, 37376, 0, 1808, 32768, 0, 1824, 32768, 0, 1840, 32768, 0, 5072, 36864, 0, 5072, 36864, 0, 5088, 36864, 0, 5088, 36864, 0, 5104, 36864, 0, 5104, 36864, 0, 7040, 5201, 0, 7040, 5201, 0, 7040, 5201, 0, 7040, 5201, 0, 7040, 5201, 0, 7056, 5201, 0, 7056, 5201, 0, 7056, 5201, 0, 7056, 5201, 0, 7056, 5201, 0, 8320, 40962, 0, 8320, 40962, 0, 8320, 40962, 0, 9472, 40962, 0, 9472, 40962, 0, 9472, 40962, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 1040, 37376, 0, 1040, 37376, 0, 1040, 37376, 0, 1056, 37376, 0, 1056, 37376, 0, 1056, 37376, 0, 1072, 37376, 0, 1072, 37376, 0, 1072, 37376, 0, 1808, 32768, 0, 1824, 32768, 0, 1840, 32768, 0, 5072, 36864, 0, 5072, 36864, 0, 5088, 36864, 0, 5088, 36864, 0, 5104, 36864, 0, 5104, 36864, 0, 7040, 5201, 0, 7040, 5201, 0, 7040, 5201, 0, 7040, 5201, 0, 7040, 5201, 0, 7056, 5201, 0, 7056, 5201, 0, 7056, 5201, 0, 7056, 5201, 0, 7056, 5201, 0, 8320, 40962, 0, 8320, 40962, 0, 8320, 40962, 0, 9472, 40962, 0, 9472, 40962, 0, 9472, 40962, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0, 9792, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575782867228303_251_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575782867228303_251_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..891d8c75 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575782867228303_251_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1216, 2048, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1600, 61471, 0, 1216, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575858324574610_255_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575858324574610_255_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6e94eab --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575858324574610_255_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 257, 0, 1600, 257, 0, 1616, 257, 0, 1616, 257, 0, 1632, 257, 0, 1632, 257, 0, 4224, 64, 0, 4240, 64, 0, 4256, 64, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 6416, 32778, 0, 6416, 32778, 0, 6416, 32778, 0, 6432, 32778, 0, 6432, 32778, 0, 6432, 32778, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 1600, 257, 0, 1600, 257, 0, 1616, 257, 0, 1616, 257, 0, 1632, 257, 0, 1632, 257, 0, 4224, 64, 0, 4240, 64, 0, 4256, 64, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 6416, 32778, 0, 6416, 32778, 0, 6416, 32778, 0, 6432, 32778, 0, 6432, 32778, 0, 6432, 32778, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7120, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0, 7136, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575859409717185_256_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575859409717185_256_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..108ee000 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575859409717185_256_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 8192, 0, 4928, 1, 0, 7296, 64, 0, 8464, 55298, 0, 8464, 55298, 0, 8464, 55298, 0, 8464, 55298, 0, 8464, 55298, 0, 8480, 55298, 0, 8480, 55298, 0, 8480, 55298, 0, 8480, 55298, 0, 8480, 55298, 0, 9152, 32768, 0, 1344, 8192, 0, 4928, 1, 0, 7296, 64, 0, 8464, 55298, 0, 8464, 55298, 0, 8464, 55298, 0, 8464, 55298, 0, 8464, 55298, 0, 8480, 55298, 0, 8480, 55298, 0, 8480, 55298, 0, 8480, 55298, 0, 8480, 55298, 0, 9152, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575859942970236_257_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575859942970236_257_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..efb287ad --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575859942970236_257_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,188 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3840, 512, 0, 3584, 32772, 0, 3584, 32772, 0, 3328, 6168, 0, 3328, 6168, 0, 3328, 6168, 0, 3328, 6168, 0, 2944, 24579, 0, 2944, 24579, 0, 2944, 24579, 0, 2944, 24579, 0, 6208, 8, 0, 11520, 5201, 0, 11520, 5201, 0, 11520, 5201, 0, 11520, 5201, 0, 11520, 5201, 0, 11840, 18724, 0, 11840, 18724, 0, 11840, 18724, 0, 11840, 18724, 0, 11840, 18724, 0, 3840, 512, 0, 3584, 32772, 0, 3584, 32772, 0, 3328, 6168, 0, 3328, 6168, 0, 3328, 6168, 0, 3328, 6168, 0, 2944, 24579, 0, 2944, 24579, 0, 2944, 24579, 0, 2944, 24579, 0, 6208, 8, 0, 11520, 5201, 0, 11520, 5201, 0, 11520, 5201, 0, 11520, 5201, 0, 11520, 5201, 0, 11840, 18724, 0, 11840, 18724, 0, 11840, 18724, 0, 11840, 18724, 0, 11840, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575860340496531_258_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575860340496531_258_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f86ad36f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575860340496531_258_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,301 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 11)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6912, 2048, 0, 8384, 8322, 0, 8384, 8322, 0, 8384, 8322, 0, 13248, 1026, 0, 13248, 1026, 0, 13248, 32768, 0, 6912, 2048, 0, 8384, 8322, 0, 8384, 8322, 0, 8384, 8322, 0, 13248, 1026, 0, 13248, 1026, 0, 13248, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575860503695877_259_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575860503695877_259_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3106ca0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575860503695877_259_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4304, 16384, 0, 4320, 16384, 0, 4304, 16384, 0, 4320, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575860620164851_260_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575860620164851_260_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5621a869 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575860620164851_260_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,422 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((189 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((200 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((306 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter8 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 3)) { + counter9 = (counter9 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((395 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (418 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter10 = 0; + while ((counter10 < 2)) { + counter10 = (counter10 + 1); + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((435 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((444 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((453 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter10 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (475 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4032, 16, 0, 4048, 16, 0, 4064, 16, 0, 9600, 17476, 0, 9600, 17476, 0, 9600, 17476, 0, 9600, 17476, 0, 10048, 34952, 0, 10048, 34952, 0, 10048, 34952, 0, 10048, 34952, 0, 13824, 33288, 0, 13824, 33288, 0, 13824, 33288, 0, 13840, 33288, 0, 13840, 33288, 0, 13840, 33288, 0, 13856, 33288, 0, 13856, 33288, 0, 13856, 33288, 0, 16320, 2, 0, 16336, 2, 0, 16352, 2, 0, 17536, 16, 0, 21184, 16, 0, 21504, 18724, 0, 21504, 18724, 0, 21504, 18724, 0, 21504, 18724, 0, 21504, 18724, 0, 22656, 57351, 0, 22656, 57351, 0, 22656, 57351, 0, 22656, 57351, 0, 22656, 57351, 0, 22656, 57351, 0, 24080, 49152, 0, 24080, 49152, 0, 24096, 49152, 0, 24096, 49152, 0, 24112, 49152, 0, 24112, 49152, 0, 25296, 2048, 0, 25312, 2048, 0, 25328, 2048, 0, 26752, 152, 0, 26752, 152, 0, 26752, 152, 0, 28432, 680, 0, 28432, 680, 0, 28432, 680, 0, 28432, 680, 0, 29008, 680, 0, 29008, 680, 0, 29008, 680, 0, 29008, 680, 0, 30400, 320, 0, 30400, 320, 0, 4032, 16, 0, 4048, 16, 0, 4064, 16, 0, 9600, 17476, 0, 9600, 17476, 0, 9600, 17476, 0, 9600, 17476, 0, 10048, 34952, 0, 10048, 34952, 0, 10048, 34952, 0, 10048, 34952, 0, 13824, 33288, 0, 13824, 33288, 0, 13824, 33288, 0, 13840, 33288, 0, 13840, 33288, 0, 13840, 33288, 0, 13856, 33288, 0, 13856, 33288, 0, 13856, 33288, 0, 16320, 2, 0, 16336, 2, 0, 16352, 2, 0, 17536, 16, 0, 21184, 16, 0, 21504, 18724, 0, 21504, 18724, 0, 21504, 18724, 0, 21504, 18724, 0, 21504, 18724, 0, 22656, 57351, 0, 22656, 57351, 0, 22656, 57351, 0, 22656, 57351, 0, 22656, 57351, 0, 22656, 57351, 0, 24080, 49152, 0, 24080, 49152, 0, 24096, 49152, 0, 24096, 49152, 0, 24112, 49152, 0, 24112, 49152, 0, 25296, 2048, 0, 25312, 2048, 0, 25328, 2048, 0, 26752, 152, 0, 26752, 152, 0, 26752, 152, 0, 28432, 680, 0, 28432, 680, 0, 28432, 680, 0, 28432, 680, 0, 29008, 680, 0, 29008, 680, 0, 29008, 680, 0, 29008, 680, 0, 30400, 320, 0, 30400, 320, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575910538016910_262_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575910538016910_262_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..69bd5b10 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575910538016910_262_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,345 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 7)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((377 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((395 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((410 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4096, 8192, 0, 4800, 2, 0, 7568, 512, 0, 7584, 512, 0, 8064, 17476, 0, 8064, 17476, 0, 8064, 17476, 0, 8064, 17476, 0, 17728, 8, 0, 26240, 2048, 0, 26256, 2048, 0, 26272, 2048, 0, 576, 17, 0, 576, 17, 0, 4096, 8192, 0, 4800, 2, 0, 7568, 512, 0, 7584, 512, 0, 8064, 17476, 0, 8064, 17476, 0, 8064, 17476, 0, 8064, 17476, 0, 17728, 8, 0, 26240, 2048, 0, 26256, 2048, 0, 26272, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575913113934757_263_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575913113934757_263_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aa530a2e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575913113934757_263_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,194 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 2048, 16644, 0, 2048, 16644, 0, 2048, 16644, 0, 4032, 16644, 0, 4032, 16644, 0, 4032, 16644, 0, 6928, 32, 0, 6944, 32, 0, 9300, 32, 0, 9304, 32, 0, 9316, 32, 0, 9320, 32, 0, 10512, 32, 0, 10528, 32, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 2048, 16644, 0, 2048, 16644, 0, 2048, 16644, 0, 4032, 16644, 0, 4032, 16644, 0, 4032, 16644, 0, 6928, 32, 0, 6944, 32, 0, 9300, 32, 0, 9304, 32, 0, 9316, 32, 0, 9320, 32, 0, 10512, 32, 0, 10528, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575913419057500_264_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575913419057500_264_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..335e82cd --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575913419057500_264_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 5)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4864, 17476, 0, 4864, 17476, 0, 4864, 17476, 0, 4864, 17476, 0, 5504, 8, 0, 12864, 2048, 0, 4864, 17476, 0, 4864, 17476, 0, 4864, 17476, 0, 4864, 17476, 0, 5504, 8, 0, 12864, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575913603720735_265_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575913603720735_265_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..927c027d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575913603720735_265_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + } + case 1: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 13776, 16520, 0, 13776, 16520, 0, 13776, 16520, 0, 13792, 16520, 0, 13792, 16520, 0, 13792, 16520, 0, 13808, 16520, 0, 13808, 16520, 0, 13808, 16520, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 9216, 52428, 0, 13776, 16520, 0, 13776, 16520, 0, 13776, 16520, 0, 13792, 16520, 0, 13792, 16520, 0, 13792, 16520, 0, 13808, 16520, 0, 13808, 16520, 0, 13808, 16520, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575978541532517_268_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575978541532517_268_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ccd8baf9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575978541532517_268_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,345 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((272 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((283 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 468 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2560, 546, 0, 2560, 546, 0, 2560, 546, 0, 2576, 546, 0, 2576, 546, 0, 2576, 546, 0, 2592, 546, 0, 2592, 546, 0, 2592, 546, 0, 4992, 17476, 0, 4992, 17476, 0, 4992, 17476, 0, 4992, 17476, 0, 5440, 34952, 0, 5440, 34952, 0, 5440, 34952, 0, 5440, 34952, 0, 6336, 17, 0, 6336, 17, 0, 14784, 34824, 0, 14784, 34824, 0, 14784, 34824, 0, 14788, 34824, 0, 14788, 34824, 0, 14788, 34824, 0, 14792, 34824, 0, 14792, 34824, 0, 14792, 34824, 0, 14800, 34824, 0, 14800, 34824, 0, 14800, 34824, 0, 14804, 34824, 0, 14804, 34824, 0, 14804, 34824, 0, 14808, 34824, 0, 14808, 34824, 0, 14808, 34824, 0, 14816, 34824, 0, 14816, 34824, 0, 14816, 34824, 0, 14820, 34824, 0, 14820, 34824, 0, 14820, 34824, 0, 14824, 34824, 0, 14824, 34824, 0, 14824, 34824, 0, 18112, 34824, 0, 18112, 34824, 0, 18112, 34824, 0, 18116, 34824, 0, 18116, 34824, 0, 18116, 34824, 0, 18120, 34824, 0, 18120, 34824, 0, 18120, 34824, 0, 18128, 34824, 0, 18128, 34824, 0, 18128, 34824, 0, 18132, 34824, 0, 18132, 34824, 0, 18132, 34824, 0, 18136, 34824, 0, 18136, 34824, 0, 18136, 34824, 0, 18144, 34824, 0, 18144, 34824, 0, 18144, 34824, 0, 18148, 34824, 0, 18148, 34824, 0, 18148, 34824, 0, 18152, 34824, 0, 18152, 34824, 0, 18152, 34824, 0, 18560, 32768, 0, 18576, 32768, 0, 18592, 32768, 0, 576, 17, 0, 576, 17, 0, 2560, 546, 0, 2560, 546, 0, 2560, 546, 0, 2576, 546, 0, 2576, 546, 0, 2576, 546, 0, 2592, 546, 0, 2592, 546, 0, 2592, 546, 0, 4992, 17476, 0, 4992, 17476, 0, 4992, 17476, 0, 4992, 17476, 0, 5440, 34952, 0, 5440, 34952, 0, 5440, 34952, 0, 5440, 34952, 0, 6336, 17, 0, 6336, 17, 0, 14784, 34824, 0, 14784, 34824, 0, 14784, 34824, 0, 14788, 34824, 0, 14788, 34824, 0, 14788, 34824, 0, 14792, 34824, 0, 14792, 34824, 0, 14792, 34824, 0, 14800, 34824, 0, 14800, 34824, 0, 14800, 34824, 0, 14804, 34824, 0, 14804, 34824, 0, 14804, 34824, 0, 14808, 34824, 0, 14808, 34824, 0, 14808, 34824, 0, 14816, 34824, 0, 14816, 34824, 0, 14816, 34824, 0, 14820, 34824, 0, 14820, 34824, 0, 14820, 34824, 0, 14824, 34824, 0, 14824, 34824, 0, 14824, 34824, 0, 18112, 34824, 0, 18112, 34824, 0, 18112, 34824, 0, 18116, 34824, 0, 18116, 34824, 0, 18116, 34824, 0, 18120, 34824, 0, 18120, 34824, 0, 18120, 34824, 0, 18128, 34824, 0, 18128, 34824, 0, 18128, 34824, 0, 18132, 34824, 0, 18132, 34824, 0, 18132, 34824, 0, 18136, 34824, 0, 18136, 34824, 0, 18136, 34824, 0, 18144, 34824, 0, 18144, 34824, 0, 18144, 34824, 0, 18148, 34824, 0, 18148, 34824, 0, 18148, 34824, 0, 18152, 34824, 0, 18152, 34824, 0, 18152, 34824, 0, 18560, 32768, 0, 18576, 32768, 0, 18592, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575982984537640_269_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575982984537640_269_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e88daa77 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575982984537640_269_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,318 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 468 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 4112, 8874, 0, 4112, 8874, 0, 4112, 8874, 0, 4112, 8874, 0, 4112, 8874, 0, 4112, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 6080, 73, 0, 6080, 73, 0, 6080, 73, 0, 10560, 8194, 0, 10560, 8194, 0, 10576, 8194, 0, 10576, 8194, 0, 11904, 18724, 0, 11904, 18724, 0, 11904, 18724, 0, 11904, 18724, 0, 11904, 18724, 0, 14016, 1, 0, 17536, 1040, 0, 17536, 1040, 0, 17856, 16644, 0, 17856, 16644, 0, 17856, 16644, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1872, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 1888, 43690, 0, 4112, 8874, 0, 4112, 8874, 0, 4112, 8874, 0, 4112, 8874, 0, 4112, 8874, 0, 4112, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 4128, 8874, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5264, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 5280, 43690, 0, 6080, 73, 0, 6080, 73, 0, 6080, 73, 0, 10560, 8194, 0, 10560, 8194, 0, 10576, 8194, 0, 10576, 8194, 0, 11904, 18724, 0, 11904, 18724, 0, 11904, 18724, 0, 11904, 18724, 0, 11904, 18724, 0, 14016, 1, 0, 17536, 1040, 0, 17536, 1040, 0, 17856, 16644, 0, 17856, 16644, 0, 17856, 16644, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0, 18432, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756575993817604151_270_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756575993817604151_270_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a157764a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756575993817604151_270_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,550 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((245 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((289 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((301 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((334 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((347 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (371 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((401 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((419 << 6) | (counter6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((434 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (442 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 14)) { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((459 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((473 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((484 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (501 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 2)) { + counter9 = (counter9 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((515 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((533 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((544 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((553 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((560 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter9 == 1)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (568 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (575 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 32769, 0, 1600, 32769, 0, 2304, 36865, 0, 2304, 36865, 0, 2304, 36865, 0, 4800, 64, 0, 13760, 9216, 0, 13760, 9216, 0, 13776, 9216, 0, 13776, 9216, 0, 15684, 1152, 0, 15684, 1152, 0, 15688, 1152, 0, 15688, 1152, 0, 15692, 1152, 0, 15692, 1152, 0, 15700, 1152, 0, 15700, 1152, 0, 15704, 1152, 0, 15704, 1152, 0, 15708, 1152, 0, 15708, 1152, 0, 17280, 2080, 0, 17280, 2080, 0, 17296, 2080, 0, 17296, 2080, 0, 20736, 16640, 0, 20736, 16640, 0, 20752, 16640, 0, 20752, 16640, 0, 21376, 4, 0, 21392, 4, 0, 23744, 1, 0, 28288, 256, 0, 34128, 8194, 0, 34128, 8194, 0, 34832, 8194, 0, 34832, 8194, 0, 35408, 544, 0, 35408, 544, 0, 36352, 17476, 0, 36352, 17476, 0, 36352, 17476, 0, 36352, 17476, 0, 36800, 34952, 0, 36800, 34952, 0, 36800, 34952, 0, 36800, 34952, 0, 1600, 32769, 0, 1600, 32769, 0, 2304, 36865, 0, 2304, 36865, 0, 2304, 36865, 0, 4800, 64, 0, 13760, 9216, 0, 13760, 9216, 0, 13776, 9216, 0, 13776, 9216, 0, 15684, 1152, 0, 15684, 1152, 0, 15688, 1152, 0, 15688, 1152, 0, 15692, 1152, 0, 15692, 1152, 0, 15700, 1152, 0, 15700, 1152, 0, 15704, 1152, 0, 15704, 1152, 0, 15708, 1152, 0, 15708, 1152, 0, 17280, 2080, 0, 17280, 2080, 0, 17296, 2080, 0, 17296, 2080, 0, 20736, 16640, 0, 20736, 16640, 0, 20752, 16640, 0, 20752, 16640, 0, 21376, 4, 0, 21392, 4, 0, 23744, 1, 0, 28288, 256, 0, 34128, 8194, 0, 34128, 8194, 0, 34832, 8194, 0, 34832, 8194, 0, 35408, 544, 0, 35408, 544, 0, 36352, 17476, 0, 36352, 17476, 0, 36352, 17476, 0, 36352, 17476, 0, 36800, 34952, 0, 36800, 34952, 0, 36800, 34952, 0, 36800, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576018234924777_271_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576018234924777_271_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6de82df0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576018234924777_271_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1146 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3200, 32768, 0, 2944, 32256, 0, 2944, 32256, 0, 2944, 32256, 0, 2944, 32256, 0, 2944, 32256, 0, 2944, 32256, 0, 4544, 31, 0, 4544, 31, 0, 4544, 31, 0, 4544, 31, 0, 4544, 31, 0, 4560, 31, 0, 4560, 31, 0, 4560, 31, 0, 4560, 31, 0, 4560, 31, 0, 4576, 31, 0, 4576, 31, 0, 4576, 31, 0, 4576, 31, 0, 4576, 31, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 6340, 73, 0, 6340, 73, 0, 6340, 73, 0, 6344, 73, 0, 6344, 73, 0, 6344, 73, 0, 6356, 73, 0, 6356, 73, 0, 6356, 73, 0, 6360, 73, 0, 6360, 73, 0, 6360, 73, 0, 6372, 73, 0, 6372, 73, 0, 6372, 73, 0, 6376, 73, 0, 6376, 73, 0, 6376, 73, 0, 6916, 1040, 0, 6916, 1040, 0, 6920, 1040, 0, 6920, 1040, 0, 6932, 1040, 0, 6932, 1040, 0, 6936, 1040, 0, 6936, 1040, 0, 6948, 1040, 0, 6948, 1040, 0, 6952, 1040, 0, 6952, 1040, 0, 7236, 18724, 0, 7236, 18724, 0, 7236, 18724, 0, 7236, 18724, 0, 7236, 18724, 0, 7240, 18724, 0, 7240, 18724, 0, 7240, 18724, 0, 7240, 18724, 0, 7240, 18724, 0, 7252, 18724, 0, 7252, 18724, 0, 7252, 18724, 0, 7252, 18724, 0, 7252, 18724, 0, 7256, 18724, 0, 7256, 18724, 0, 7256, 18724, 0, 7256, 18724, 0, 7256, 18724, 0, 7268, 18724, 0, 7268, 18724, 0, 7268, 18724, 0, 7268, 18724, 0, 7268, 18724, 0, 7272, 18724, 0, 7272, 18724, 0, 7272, 18724, 0, 7272, 18724, 0, 7272, 18724, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8896, 15, 0, 8896, 15, 0, 8896, 15, 0, 8896, 15, 0, 8912, 15, 0, 8912, 15, 0, 8912, 15, 0, 8912, 15, 0, 8928, 15, 0, 8928, 15, 0, 8928, 15, 0, 8928, 15, 0, 3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3584, 127, 0, 3200, 32768, 0, 2944, 32256, 0, 2944, 32256, 0, 2944, 32256, 0, 2944, 32256, 0, 2944, 32256, 0, 2944, 32256, 0, 4544, 31, 0, 4544, 31, 0, 4544, 31, 0, 4544, 31, 0, 4544, 31, 0, 4560, 31, 0, 4560, 31, 0, 4560, 31, 0, 4560, 31, 0, 4560, 31, 0, 4576, 31, 0, 4576, 31, 0, 4576, 31, 0, 4576, 31, 0, 4576, 31, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5700, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5704, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5716, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5720, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5732, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 5736, 61447, 0, 6340, 73, 0, 6340, 73, 0, 6340, 73, 0, 6344, 73, 0, 6344, 73, 0, 6344, 73, 0, 6356, 73, 0, 6356, 73, 0, 6356, 73, 0, 6360, 73, 0, 6360, 73, 0, 6360, 73, 0, 6372, 73, 0, 6372, 73, 0, 6372, 73, 0, 6376, 73, 0, 6376, 73, 0, 6376, 73, 0, 6916, 1040, 0, 6916, 1040, 0, 6920, 1040, 0, 6920, 1040, 0, 6932, 1040, 0, 6932, 1040, 0, 6936, 1040, 0, 6936, 1040, 0, 6948, 1040, 0, 6948, 1040, 0, 6952, 1040, 0, 6952, 1040, 0, 7236, 18724, 0, 7236, 18724, 0, 7236, 18724, 0, 7236, 18724, 0, 7236, 18724, 0, 7240, 18724, 0, 7240, 18724, 0, 7240, 18724, 0, 7240, 18724, 0, 7240, 18724, 0, 7252, 18724, 0, 7252, 18724, 0, 7252, 18724, 0, 7252, 18724, 0, 7252, 18724, 0, 7256, 18724, 0, 7256, 18724, 0, 7256, 18724, 0, 7256, 18724, 0, 7256, 18724, 0, 7268, 18724, 0, 7268, 18724, 0, 7268, 18724, 0, 7268, 18724, 0, 7268, 18724, 0, 7272, 18724, 0, 7272, 18724, 0, 7272, 18724, 0, 7272, 18724, 0, 7272, 18724, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8324, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8328, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8340, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8344, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8356, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8360, 57375, 0, 8896, 15, 0, 8896, 15, 0, 8896, 15, 0, 8896, 15, 0, 8912, 15, 0, 8912, 15, 0, 8912, 15, 0, 8912, 15, 0, 8928, 15, 0, 8928, 15, 0, 8928, 15, 0, 8928, 15, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576068807573175_273_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576068807573175_273_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..98b04927 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576068807573175_273_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,305 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7744, 256, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 10816, 16384, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 16384, 0, 12288, 73, 0, 12288, 73, 0, 12288, 73, 0, 12864, 1040, 0, 12864, 1040, 0, 7744, 256, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 8640, 21845, 0, 10816, 16384, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 5461, 0, 11648, 16384, 0, 12288, 73, 0, 12288, 73, 0, 12288, 73, 0, 12864, 1040, 0, 12864, 1040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576069133495330_274_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576069133495330_274_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..81238ae2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576069133495330_274_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,350 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 13072, 32768, 0, 13088, 32768, 0, 13104, 32768, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 16384, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 13072, 32768, 0, 13088, 32768, 0, 13104, 32768, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 5461, 0, 24256, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576069374510058_275_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576069374510058_275_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3017420b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576069374510058_275_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((40 << 6) | (i0 << 4)) | (i1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((47 << 6) | (i0 << 4)) | (i1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 1, 0, 3009, 1, 0, 3010, 1, 0, 3012, 1, 0, 3013, 1, 0, 3014, 1, 0, 3016, 1, 0, 3017, 1, 0, 3018, 1, 0, 3024, 1, 0, 3025, 1, 0, 3026, 1, 0, 3028, 1, 0, 3029, 1, 0, 3030, 1, 0, 3032, 1, 0, 3033, 1, 0, 3034, 1, 0, 3904, 1, 0, 3908, 1, 0, 3912, 1, 0, 3920, 1, 0, 3924, 1, 0, 3928, 1, 0, 4672, 1, 0, 4688, 1, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 3008, 1, 0, 3009, 1, 0, 3010, 1, 0, 3012, 1, 0, 3013, 1, 0, 3014, 1, 0, 3016, 1, 0, 3017, 1, 0, 3018, 1, 0, 3024, 1, 0, 3025, 1, 0, 3026, 1, 0, 3028, 1, 0, 3029, 1, 0, 3030, 1, 0, 3032, 1, 0, 3033, 1, 0, 3034, 1, 0, 3904, 1, 0, 3908, 1, 0, 3912, 1, 0, 3920, 1, 0, 3924, 1, 0, 3928, 1, 0, 4672, 1, 0, 4688, 1, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5376, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0, 5392, 57375, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576070542935107_276_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576070542935107_276_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b25dce5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576070542935107_276_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,335 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((300 << 6) | (i3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((320 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((369 << 6) | (i3 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((384 << 6) | (i3 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((397 << 6) | (i3 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 510 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7040, 4, 0, 12864, 32768, 0, 13440, 1040, 0, 13440, 1040, 0, 13760, 18724, 0, 13760, 18724, 0, 13760, 18724, 0, 13760, 18724, 0, 13760, 18724, 0, 14720, 32768, 0, 14736, 32768, 0, 19648, 128, 0, 19664, 128, 0, 24580, 66, 0, 24580, 66, 0, 24584, 66, 0, 24584, 66, 0, 24588, 66, 0, 24588, 66, 0, 24596, 66, 0, 24596, 66, 0, 24600, 66, 0, 24600, 66, 0, 24604, 66, 0, 24604, 66, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 7040, 4, 0, 12864, 32768, 0, 13440, 1040, 0, 13440, 1040, 0, 13760, 18724, 0, 13760, 18724, 0, 13760, 18724, 0, 13760, 18724, 0, 13760, 18724, 0, 14720, 32768, 0, 14736, 32768, 0, 19648, 128, 0, 19664, 128, 0, 24580, 66, 0, 24580, 66, 0, 24584, 66, 0, 24584, 66, 0, 24588, 66, 0, 24588, 66, 0, 24596, 66, 0, 24596, 66, 0, 24600, 66, 0, 24600, 66, 0, 24604, 66, 0, 24604, 66, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25412, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25416, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25420, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25428, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25432, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0, 25436, 64527, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576141263710341_277_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576141263710341_277_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9b4c6ad5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576141263710341_277_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 2500, 31, 0, 2500, 31, 0, 2500, 31, 0, 2500, 31, 0, 2500, 31, 0, 2504, 31, 0, 2504, 31, 0, 2504, 31, 0, 2504, 31, 0, 2504, 31, 0, 2516, 31, 0, 2516, 31, 0, 2516, 31, 0, 2516, 31, 0, 2516, 31, 0, 2520, 31, 0, 2520, 31, 0, 2520, 31, 0, 2520, 31, 0, 2520, 31, 0, 5060, 16389, 0, 5060, 16389, 0, 5060, 16389, 0, 5064, 16389, 0, 5064, 16389, 0, 5064, 16389, 0, 5076, 16389, 0, 5076, 16389, 0, 5076, 16389, 0, 5080, 16389, 0, 5080, 16389, 0, 5080, 16389, 0, 6720, 64, 0, 1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 1088, 49183, 0, 2500, 31, 0, 2500, 31, 0, 2500, 31, 0, 2500, 31, 0, 2500, 31, 0, 2504, 31, 0, 2504, 31, 0, 2504, 31, 0, 2504, 31, 0, 2504, 31, 0, 2516, 31, 0, 2516, 31, 0, 2516, 31, 0, 2516, 31, 0, 2516, 31, 0, 2520, 31, 0, 2520, 31, 0, 2520, 31, 0, 2520, 31, 0, 2520, 31, 0, 5060, 16389, 0, 5060, 16389, 0, 5060, 16389, 0, 5064, 16389, 0, 5064, 16389, 0, 5064, 16389, 0, 5076, 16389, 0, 5076, 16389, 0, 5076, 16389, 0, 5080, 16389, 0, 5080, 16389, 0, 5080, 16389, 0, 6720, 64, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576143481099712_278_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576143481099712_278_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d577eb4f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576143481099712_278_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 4, 0, 2576, 5185, 0, 2576, 5185, 0, 2576, 5185, 0, 2576, 5185, 0, 2580, 5185, 0, 2580, 5185, 0, 2580, 5185, 0, 2580, 5185, 0, 4752, 32, 0, 4756, 32, 0, 5200, 4096, 0, 5204, 4096, 0, 5648, 64, 0, 5652, 64, 0, 6096, 4, 0, 848, 4, 0, 2576, 5185, 0, 2576, 5185, 0, 2576, 5185, 0, 2576, 5185, 0, 2580, 5185, 0, 2580, 5185, 0, 2580, 5185, 0, 2580, 5185, 0, 4752, 32, 0, 4756, 32, 0, 5200, 4096, 0, 5204, 4096, 0, 5648, 64, 0, 5652, 64, 0, 6096, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576144410881887_279_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576144410881887_279_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d27e8a67 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576144410881887_279_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 4, 0, 8064, 2, 0, 9216, 32768, 0, 9232, 32768, 0, 9248, 32768, 0, 9792, 1040, 0, 9792, 1040, 0, 10112, 18724, 0, 10112, 18724, 0, 10112, 18724, 0, 10112, 18724, 0, 10112, 18724, 0, 11968, 6, 0, 11968, 6, 0, 1856, 4, 0, 8064, 2, 0, 9216, 32768, 0, 9232, 32768, 0, 9248, 32768, 0, 9792, 1040, 0, 9792, 1040, 0, 10112, 18724, 0, 10112, 18724, 0, 10112, 18724, 0, 10112, 18724, 0, 10112, 18724, 0, 11968, 6, 0, 11968, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576147071279985_281_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576147071279985_281_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c41aa3ca --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576147071279985_281_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,109 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 11)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 4164, 0, 1488, 4164, 0, 1488, 4164, 0, 1504, 4164, 0, 1504, 4164, 0, 1504, 4164, 0, 1488, 4164, 0, 1488, 4164, 0, 1488, 4164, 0, 1504, 4164, 0, 1504, 4164, 0, 1504, 4164, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576201617520276_283_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576201617520276_283_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..232d952b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576201617520276_283_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,330 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 10)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 12)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3456, 1, 0, 10624, 17476, 0, 10624, 17476, 0, 10624, 17476, 0, 10624, 17476, 0, 11072, 34952, 0, 11072, 34952, 0, 11072, 34952, 0, 11072, 34952, 0, 12432, 4096, 0, 12448, 4096, 0, 17680, 4096, 0, 17696, 4096, 0, 3456, 1, 0, 10624, 17476, 0, 10624, 17476, 0, 10624, 17476, 0, 10624, 17476, 0, 11072, 34952, 0, 11072, 34952, 0, 11072, 34952, 0, 11072, 34952, 0, 12432, 4096, 0, 12448, 4096, 0, 17680, 4096, 0, 17696, 4096, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576203284493403_285_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576203284493403_285_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..60b0b77b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576203284493403_285_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,399 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 2)) { + break; + } + } + break; + } + } + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((328 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + break; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((361 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (381 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4224, 16384, 0, 4240, 16384, 0, 12160, 4096, 0, 14976, 4097, 0, 14976, 4097, 0, 15680, 4097, 0, 15680, 4097, 0, 16848, 8192, 0, 16864, 8192, 0, 20992, 24579, 0, 20992, 24579, 0, 20992, 24579, 0, 20992, 24579, 0, 20996, 24579, 0, 20996, 24579, 0, 20996, 24579, 0, 20996, 24579, 0, 21008, 24579, 0, 21008, 24579, 0, 21008, 24579, 0, 21008, 24579, 0, 21012, 24579, 0, 21012, 24579, 0, 21012, 24579, 0, 21012, 24579, 0, 21024, 24579, 0, 21024, 24579, 0, 21024, 24579, 0, 21024, 24579, 0, 21028, 24579, 0, 21028, 24579, 0, 21028, 24579, 0, 21028, 24579, 0, 21824, 8, 0, 24384, 2048, 0, 576, 17, 0, 576, 17, 0, 4224, 16384, 0, 4240, 16384, 0, 12160, 4096, 0, 14976, 4097, 0, 14976, 4097, 0, 15680, 4097, 0, 15680, 4097, 0, 16848, 8192, 0, 16864, 8192, 0, 20992, 24579, 0, 20992, 24579, 0, 20992, 24579, 0, 20992, 24579, 0, 20996, 24579, 0, 20996, 24579, 0, 20996, 24579, 0, 20996, 24579, 0, 21008, 24579, 0, 21008, 24579, 0, 21008, 24579, 0, 21008, 24579, 0, 21012, 24579, 0, 21012, 24579, 0, 21012, 24579, 0, 21012, 24579, 0, 21024, 24579, 0, 21024, 24579, 0, 21024, 24579, 0, 21024, 24579, 0, 21028, 24579, 0, 21028, 24579, 0, 21028, 24579, 0, 21028, 24579, 0, 21824, 8, 0, 24384, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576255150836371_286_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576255150836371_286_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87ea2c4a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576255150836371_286_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1616, 49409, 0, 1616, 49409, 0, 1616, 49409, 0, 1616, 49409, 0, 2900, 528, 0, 2900, 528, 0, 2904, 528, 0, 2904, 528, 0, 5780, 18, 0, 5780, 18, 0, 5784, 18, 0, 5784, 18, 0, 6868, 8264, 0, 6868, 8264, 0, 6868, 8264, 0, 6872, 8264, 0, 6872, 8264, 0, 6872, 8264, 0, 1616, 49409, 0, 1616, 49409, 0, 1616, 49409, 0, 1616, 49409, 0, 2900, 528, 0, 2900, 528, 0, 2904, 528, 0, 2904, 528, 0, 5780, 18, 0, 5780, 18, 0, 5784, 18, 0, 5784, 18, 0, 6868, 8264, 0, 6868, 8264, 0, 6868, 8264, 0, 6872, 8264, 0, 6872, 8264, 0, 6872, 8264, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576260361264197_288_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576260361264197_288_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4a9387bb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576260361264197_288_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 450 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 2384, 16384, 0, 2400, 16384, 0, 2416, 16384, 0, 4560, 512, 0, 4576, 512, 0, 4592, 512, 0, 5824, 4161, 0, 5824, 4161, 0, 5824, 4161, 0, 5840, 4161, 0, 5840, 4161, 0, 5840, 4161, 0, 7296, 32769, 0, 7296, 32769, 0, 7312, 32769, 0, 7312, 32769, 0, 8000, 32769, 0, 8000, 32769, 0, 8016, 32769, 0, 8016, 32769, 0, 8960, 45056, 0, 8960, 45056, 0, 8960, 45056, 0, 8976, 45056, 0, 8976, 45056, 0, 8976, 45056, 0, 8992, 45056, 0, 8992, 45056, 0, 8992, 45056, 0, 10624, 1, 0, 10640, 1, 0, 10656, 1, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 13568, 16680, 0, 13568, 16680, 0, 13568, 16680, 0, 13568, 16680, 0, 13184, 45059, 0, 13184, 45059, 0, 13184, 45059, 0, 13184, 45059, 0, 13184, 45059, 0, 576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 2384, 16384, 0, 2400, 16384, 0, 2416, 16384, 0, 4560, 512, 0, 4576, 512, 0, 4592, 512, 0, 5824, 4161, 0, 5824, 4161, 0, 5824, 4161, 0, 5840, 4161, 0, 5840, 4161, 0, 5840, 4161, 0, 7296, 32769, 0, 7296, 32769, 0, 7312, 32769, 0, 7312, 32769, 0, 8000, 32769, 0, 8000, 32769, 0, 8016, 32769, 0, 8016, 32769, 0, 8960, 45056, 0, 8960, 45056, 0, 8960, 45056, 0, 8976, 45056, 0, 8976, 45056, 0, 8976, 45056, 0, 8992, 45056, 0, 8992, 45056, 0, 8992, 45056, 0, 10624, 1, 0, 10640, 1, 0, 10656, 1, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 11136, 65535, 0, 13568, 16680, 0, 13568, 16680, 0, 13568, 16680, 0, 13568, 16680, 0, 13184, 45059, 0, 13184, 45059, 0, 13184, 45059, 0, 13184, 45059, 0, 13184, 45059, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576267056626476_289_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576267056626476_289_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3572d6c9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576267056626476_289_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,324 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1408, 8, 0, 5056, 2080, 0, 5056, 2080, 0, 6208, 49153, 0, 6208, 49153, 0, 6208, 49153, 0, 7632, 1, 0, 8788, 32771, 0, 8788, 32771, 0, 8788, 32771, 0, 8792, 32771, 0, 8792, 32771, 0, 8792, 32771, 0, 8796, 32771, 0, 8796, 32771, 0, 8796, 32771, 0, 9492, 49155, 0, 9492, 49155, 0, 9492, 49155, 0, 9492, 49155, 0, 9496, 49155, 0, 9496, 49155, 0, 9496, 49155, 0, 9496, 49155, 0, 9500, 49155, 0, 9500, 49155, 0, 9500, 49155, 0, 9500, 49155, 0, 10752, 8288, 0, 10752, 8288, 0, 10752, 8288, 0, 12816, 5460, 0, 12816, 5460, 0, 12816, 5460, 0, 12816, 5460, 0, 12816, 5460, 0, 12816, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 14288, 2088, 0, 14288, 2088, 0, 14288, 2088, 0, 14304, 2088, 0, 14304, 2088, 0, 14304, 2088, 0, 15504, 8704, 0, 15504, 8704, 0, 15520, 8704, 0, 15520, 8704, 0, 16128, 73, 0, 16128, 73, 0, 16128, 73, 0, 16704, 1040, 0, 16704, 1040, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1408, 8, 0, 5056, 2080, 0, 5056, 2080, 0, 6208, 49153, 0, 6208, 49153, 0, 6208, 49153, 0, 7632, 1, 0, 8788, 32771, 0, 8788, 32771, 0, 8788, 32771, 0, 8792, 32771, 0, 8792, 32771, 0, 8792, 32771, 0, 8796, 32771, 0, 8796, 32771, 0, 8796, 32771, 0, 9492, 49155, 0, 9492, 49155, 0, 9492, 49155, 0, 9492, 49155, 0, 9496, 49155, 0, 9496, 49155, 0, 9496, 49155, 0, 9496, 49155, 0, 9500, 49155, 0, 9500, 49155, 0, 9500, 49155, 0, 9500, 49155, 0, 10752, 8288, 0, 10752, 8288, 0, 10752, 8288, 0, 12816, 5460, 0, 12816, 5460, 0, 12816, 5460, 0, 12816, 5460, 0, 12816, 5460, 0, 12816, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 12832, 5460, 0, 14288, 2088, 0, 14288, 2088, 0, 14288, 2088, 0, 14304, 2088, 0, 14304, 2088, 0, 14304, 2088, 0, 15504, 8704, 0, 15504, 8704, 0, 15520, 8704, 0, 15520, 8704, 0, 16128, 73, 0, 16128, 73, 0, 16128, 73, 0, 16704, 1040, 0, 16704, 1040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576270908742634_290_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576270908742634_290_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bf2fbf9c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576270908742634_290_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5184, 1, 0, 8448, 8192, 0, 9408, 12800, 0, 9408, 12800, 0, 9408, 12800, 0, 9424, 12800, 0, 9424, 12800, 0, 9424, 12800, 0, 9984, 3, 0, 9984, 3, 0, 10000, 3, 0, 10000, 3, 0, 10432, 3, 0, 10432, 3, 0, 11072, 65, 0, 11072, 65, 0, 12432, 1024, 0, 12448, 1024, 0, 12464, 1024, 0, 12736, 16676, 0, 12736, 16676, 0, 12736, 16676, 0, 12736, 16676, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 5184, 1, 0, 8448, 8192, 0, 9408, 12800, 0, 9408, 12800, 0, 9408, 12800, 0, 9424, 12800, 0, 9424, 12800, 0, 9424, 12800, 0, 9984, 3, 0, 9984, 3, 0, 10000, 3, 0, 10000, 3, 0, 10432, 3, 0, 10432, 3, 0, 11072, 65, 0, 11072, 65, 0, 12432, 1024, 0, 12448, 1024, 0, 12464, 1024, 0, 12736, 16676, 0, 12736, 16676, 0, 12736, 16676, 0, 12736, 16676, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0, 13184, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576275666958875_292_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576275666958875_292_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..efe337c9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576275666958875_292_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,135 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576275863857150_293_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576275863857150_293_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48ccbff9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576275863857150_293_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,416 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 33288, 0, 1024, 33288, 0, 1024, 33288, 0, 1664, 8, 0, 3392, 1040, 0, 3392, 1040, 0, 3712, 18724, 0, 3712, 18724, 0, 3712, 18724, 0, 3712, 18724, 0, 3712, 18724, 0, 12224, 1, 0, 12800, 16, 0, 13120, 272, 0, 13120, 272, 0, 13824, 17, 0, 13824, 17, 0, 17600, 2, 0, 19856, 512, 0, 19872, 512, 0, 20352, 17476, 0, 20352, 17476, 0, 20352, 17476, 0, 20352, 17476, 0, 20800, 34952, 0, 20800, 34952, 0, 20800, 34952, 0, 20800, 34952, 0, 1024, 33288, 0, 1024, 33288, 0, 1024, 33288, 0, 1664, 8, 0, 3392, 1040, 0, 3392, 1040, 0, 3712, 18724, 0, 3712, 18724, 0, 3712, 18724, 0, 3712, 18724, 0, 3712, 18724, 0, 12224, 1, 0, 12800, 16, 0, 13120, 272, 0, 13120, 272, 0, 13824, 17, 0, 13824, 17, 0, 17600, 2, 0, 19856, 512, 0, 19872, 512, 0, 20352, 17476, 0, 20352, 17476, 0, 20352, 17476, 0, 20352, 17476, 0, 20800, 34952, 0, 20800, 34952, 0, 20800, 34952, 0, 20800, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576279141475504_294_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576279141475504_294_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3b4e8407 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576279141475504_294_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,280 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((110 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3328, 4112, 0, 3328, 4112, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 5328, 1, 0, 5344, 1, 0, 8976, 1, 0, 8992, 1, 0, 12544, 64, 0, 14080, 64, 0, 15232, 136, 0, 15232, 136, 0, 15872, 85, 0, 15872, 85, 0, 15872, 85, 0, 15872, 85, 0, 3328, 4112, 0, 3328, 4112, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 3968, 127, 0, 5328, 1, 0, 5344, 1, 0, 8976, 1, 0, 8992, 1, 0, 12544, 64, 0, 14080, 64, 0, 15232, 136, 0, 15232, 136, 0, 15872, 85, 0, 15872, 85, 0, 15872, 85, 0, 15872, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576288798846718_296_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576288798846718_296_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d90d2a4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576288798846718_296_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 5060, 34816, 0, 5060, 34816, 0, 5064, 34816, 0, 5064, 34816, 0, 5068, 34816, 0, 5068, 34816, 0, 5076, 34816, 0, 5076, 34816, 0, 5080, 34816, 0, 5080, 34816, 0, 5084, 34816, 0, 5084, 34816, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 5060, 34816, 0, 5060, 34816, 0, 5064, 34816, 0, 5064, 34816, 0, 5068, 34816, 0, 5068, 34816, 0, 5076, 34816, 0, 5076, 34816, 0, 5080, 34816, 0, 5080, 34816, 0, 5084, 34816, 0, 5084, 34816, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576289657814297_297_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576289657814297_297_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf3b527a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576289657814297_297_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2244, 32770, 0, 2244, 32770, 0, 2248, 32770, 0, 2248, 32770, 0, 2252, 32770, 0, 2252, 32770, 0, 2260, 32770, 0, 2260, 32770, 0, 2264, 32770, 0, 2264, 32770, 0, 2268, 32770, 0, 2268, 32770, 0, 2276, 32770, 0, 2276, 32770, 0, 2280, 32770, 0, 2280, 32770, 0, 2284, 32770, 0, 2284, 32770, 0, 3716, 32770, 0, 3716, 32770, 0, 3720, 32770, 0, 3720, 32770, 0, 3724, 32770, 0, 3724, 32770, 0, 3732, 32770, 0, 3732, 32770, 0, 3736, 32770, 0, 3736, 32770, 0, 3740, 32770, 0, 3740, 32770, 0, 3748, 32770, 0, 3748, 32770, 0, 3752, 32770, 0, 3752, 32770, 0, 3756, 32770, 0, 3756, 32770, 0, 4416, 32770, 0, 4416, 32770, 0, 4432, 32770, 0, 4432, 32770, 0, 4448, 32770, 0, 4448, 32770, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2244, 32770, 0, 2244, 32770, 0, 2248, 32770, 0, 2248, 32770, 0, 2252, 32770, 0, 2252, 32770, 0, 2260, 32770, 0, 2260, 32770, 0, 2264, 32770, 0, 2264, 32770, 0, 2268, 32770, 0, 2268, 32770, 0, 2276, 32770, 0, 2276, 32770, 0, 2280, 32770, 0, 2280, 32770, 0, 2284, 32770, 0, 2284, 32770, 0, 3716, 32770, 0, 3716, 32770, 0, 3720, 32770, 0, 3720, 32770, 0, 3724, 32770, 0, 3724, 32770, 0, 3732, 32770, 0, 3732, 32770, 0, 3736, 32770, 0, 3736, 32770, 0, 3740, 32770, 0, 3740, 32770, 0, 3748, 32770, 0, 3748, 32770, 0, 3752, 32770, 0, 3752, 32770, 0, 3756, 32770, 0, 3756, 32770, 0, 4416, 32770, 0, 4416, 32770, 0, 4432, 32770, 0, 4432, 32770, 0, 4448, 32770, 0, 4448, 32770, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576291984088522_298_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576291984088522_298_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1f9142b8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576291984088522_298_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,208 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 7440, 18432, 0, 7440, 18432, 0, 7456, 18432, 0, 7456, 18432, 0, 7472, 18432, 0, 7472, 18432, 0, 9364, 4, 0, 9368, 4, 0, 9372, 4, 0, 9380, 4, 0, 9384, 4, 0, 9388, 4, 0, 9396, 4, 0, 9400, 4, 0, 9404, 4, 0, 9808, 18432, 0, 9808, 18432, 0, 9824, 18432, 0, 9824, 18432, 0, 9840, 18432, 0, 9840, 18432, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 7440, 18432, 0, 7440, 18432, 0, 7456, 18432, 0, 7456, 18432, 0, 7472, 18432, 0, 7472, 18432, 0, 9364, 4, 0, 9368, 4, 0, 9372, 4, 0, 9380, 4, 0, 9384, 4, 0, 9388, 4, 0, 9396, 4, 0, 9400, 4, 0, 9404, 4, 0, 9808, 18432, 0, 9808, 18432, 0, 9824, 18432, 0, 9824, 18432, 0, 9840, 18432, 0, 9840, 18432, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576304175077297_300_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576304175077297_300_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d2a3dc65 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576304175077297_300_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 3584, 32770, 0, 3584, 32770, 0, 3600, 32770, 0, 3600, 32770, 0, 3616, 32770, 0, 3616, 32770, 0, 4804, 128, 0, 4808, 128, 0, 4812, 128, 0, 4820, 128, 0, 4824, 128, 0, 4828, 128, 0, 4836, 128, 0, 4840, 128, 0, 4844, 128, 0, 9216, 32778, 0, 9216, 32778, 0, 9216, 32778, 0, 9232, 32778, 0, 9232, 32778, 0, 9232, 32778, 0, 9248, 32778, 0, 9248, 32778, 0, 9248, 32778, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 3584, 32770, 0, 3584, 32770, 0, 3600, 32770, 0, 3600, 32770, 0, 3616, 32770, 0, 3616, 32770, 0, 4804, 128, 0, 4808, 128, 0, 4812, 128, 0, 4820, 128, 0, 4824, 128, 0, 4828, 128, 0, 4836, 128, 0, 4840, 128, 0, 4844, 128, 0, 9216, 32778, 0, 9216, 32778, 0, 9216, 32778, 0, 9232, 32778, 0, 9232, 32778, 0, 9232, 32778, 0, 9248, 32778, 0, 9248, 32778, 0, 9248, 32778, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576305755866737_301_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576305755866737_301_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f057a294 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576305755866737_301_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576310797336503_304_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576310797336503_304_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dfd080e5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576310797336503_304_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((176 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((185 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((256 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((267 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 4369, 0, 1216, 4369, 0, 1216, 4369, 0, 1216, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 6980, 512, 0, 6984, 512, 0, 6996, 512, 0, 7000, 512, 0, 7872, 32, 0, 12560, 64, 0, 12564, 64, 0, 12568, 64, 0, 12576, 64, 0, 12580, 64, 0, 12584, 64, 0, 16384, 32768, 0, 16388, 32768, 0, 16400, 32768, 0, 16404, 32768, 0, 1216, 4369, 0, 1216, 4369, 0, 1216, 4369, 0, 1216, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 6980, 512, 0, 6984, 512, 0, 6996, 512, 0, 7000, 512, 0, 7872, 32, 0, 12560, 64, 0, 12564, 64, 0, 12568, 64, 0, 12576, 64, 0, 12580, 64, 0, 12584, 64, 0, 16384, 32768, 0, 16388, 32768, 0, 16400, 32768, 0, 16404, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576323428183696_305_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576323428183696_305_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576323428183696_305_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576323527494784_306_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576323527494784_306_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1e896939 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576323527494784_306_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 49159, 0, 2880, 49159, 0, 2880, 49159, 0, 2880, 49159, 0, 2880, 49159, 0, 2240, 10920, 0, 2240, 10920, 0, 2240, 10920, 0, 2240, 10920, 0, 2240, 10920, 0, 2240, 10920, 0, 1984, 1024, 0, 2880, 49159, 0, 2880, 49159, 0, 2880, 49159, 0, 2880, 49159, 0, 2880, 49159, 0, 2240, 10920, 0, 2240, 10920, 0, 2240, 10920, 0, 2240, 10920, 0, 2240, 10920, 0, 2240, 10920, 0, 1984, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576348574223647_308_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576348574223647_308_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e7b04953 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576348574223647_308_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,411 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((251 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((261 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((272 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((283 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 2)) { + break; + } + } + if ((counter3 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((339 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((369 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((401 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 2048, 0, 10432, 21840, 0, 10432, 21840, 0, 10432, 21840, 0, 10432, 21840, 0, 10432, 21840, 0, 10432, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 14928, 11288, 0, 14928, 11288, 0, 14928, 11288, 0, 14928, 11288, 0, 14928, 11288, 0, 16084, 8, 0, 16088, 8, 0, 17428, 8192, 0, 17432, 8192, 0, 18132, 8192, 0, 18136, 8192, 0, 19136, 73, 0, 19136, 73, 0, 19136, 73, 0, 19712, 1040, 0, 19712, 1040, 0, 25680, 2048, 0, 25696, 2048, 0, 2240, 2048, 0, 10432, 21840, 0, 10432, 21840, 0, 10432, 21840, 0, 10432, 21840, 0, 10432, 21840, 0, 10432, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10448, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 10464, 21840, 0, 14928, 11288, 0, 14928, 11288, 0, 14928, 11288, 0, 14928, 11288, 0, 14928, 11288, 0, 16084, 8, 0, 16088, 8, 0, 17428, 8192, 0, 17432, 8192, 0, 18132, 8192, 0, 18136, 8192, 0, 19136, 73, 0, 19136, 73, 0, 19136, 73, 0, 19712, 1040, 0, 19712, 1040, 0, 25680, 2048, 0, 25696, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576363115485087_310_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576363115485087_310_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e997da55 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576363115485087_310_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 336 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1856, 45057, 0, 1856, 45057, 0, 1856, 45057, 0, 1856, 45057, 0, 2496, 1, 0, 3392, 9216, 0, 3392, 9216, 0, 3840, 41984, 0, 3840, 41984, 0, 3840, 41984, 0, 5200, 64, 0, 9744, 43008, 0, 9744, 43008, 0, 9744, 43008, 0, 9748, 43008, 0, 9748, 43008, 0, 9748, 43008, 0, 9752, 43008, 0, 9752, 43008, 0, 9752, 43008, 0, 9760, 43008, 0, 9760, 43008, 0, 9760, 43008, 0, 9764, 43008, 0, 9764, 43008, 0, 9764, 43008, 0, 9768, 43008, 0, 9768, 43008, 0, 9768, 43008, 0, 10448, 43010, 0, 10448, 43010, 0, 10448, 43010, 0, 10448, 43010, 0, 10452, 43010, 0, 10452, 43010, 0, 10452, 43010, 0, 10452, 43010, 0, 10456, 43010, 0, 10456, 43010, 0, 10456, 43010, 0, 10456, 43010, 0, 10464, 43010, 0, 10464, 43010, 0, 10464, 43010, 0, 10464, 43010, 0, 10468, 43010, 0, 10468, 43010, 0, 10468, 43010, 0, 10468, 43010, 0, 10472, 43010, 0, 10472, 43010, 0, 10472, 43010, 0, 10472, 43010, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1856, 45057, 0, 1856, 45057, 0, 1856, 45057, 0, 1856, 45057, 0, 2496, 1, 0, 3392, 9216, 0, 3392, 9216, 0, 3840, 41984, 0, 3840, 41984, 0, 3840, 41984, 0, 5200, 64, 0, 9744, 43008, 0, 9744, 43008, 0, 9744, 43008, 0, 9748, 43008, 0, 9748, 43008, 0, 9748, 43008, 0, 9752, 43008, 0, 9752, 43008, 0, 9752, 43008, 0, 9760, 43008, 0, 9760, 43008, 0, 9760, 43008, 0, 9764, 43008, 0, 9764, 43008, 0, 9764, 43008, 0, 9768, 43008, 0, 9768, 43008, 0, 9768, 43008, 0, 10448, 43010, 0, 10448, 43010, 0, 10448, 43010, 0, 10448, 43010, 0, 10452, 43010, 0, 10452, 43010, 0, 10452, 43010, 0, 10452, 43010, 0, 10456, 43010, 0, 10456, 43010, 0, 10456, 43010, 0, 10456, 43010, 0, 10464, 43010, 0, 10464, 43010, 0, 10464, 43010, 0, 10464, 43010, 0, 10468, 43010, 0, 10468, 43010, 0, 10468, 43010, 0, 10468, 43010, 0, 10472, 43010, 0, 10472, 43010, 0, 10472, 43010, 0, 10472, 43010, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576365869875702_311_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576365869875702_311_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29a8fcde --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576365869875702_311_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,243 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 4496, 32776, 0, 4496, 32776, 0, 4512, 32776, 0, 4512, 32776, 0, 4528, 32776, 0, 4528, 32776, 0, 5200, 32768, 0, 5216, 32768, 0, 5232, 32768, 0, 7168, 128, 0, 7184, 128, 0, 7200, 128, 0, 7488, 2048, 0, 8128, 32768, 0, 9472, 32768, 0, 10240, 32768, 0, 11136, 16384, 0, 576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 4496, 32776, 0, 4496, 32776, 0, 4512, 32776, 0, 4512, 32776, 0, 4528, 32776, 0, 4528, 32776, 0, 5200, 32768, 0, 5216, 32768, 0, 5232, 32768, 0, 7168, 128, 0, 7184, 128, 0, 7200, 128, 0, 7488, 2048, 0, 8128, 32768, 0, 9472, 32768, 0, 10240, 32768, 0, 11136, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576366497282180_312_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576366497282180_312_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..550bd6aa --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576366497282180_312_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 73, 0, 2112, 73, 0, 2112, 73, 0, 4160, 16, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 73, 0, 2112, 73, 0, 2112, 73, 0, 4160, 16, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576367328372285_314_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576367328372285_314_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dea1ebb4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576367328372285_314_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((25 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 4288, 1, 0, 5440, 1, 0, 6272, 4097, 0, 6272, 4097, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2196, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 2200, 21845, 0, 4288, 1, 0, 5440, 1, 0, 6272, 4097, 0, 6272, 4097, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9088, 26214, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 9536, 61166, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10704, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 10720, 63491, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11408, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0, 11424, 64515, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576369113696648_315_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576369113696648_315_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9e8545b5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576369113696648_315_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 5376, 16644, 0, 5376, 16644, 0, 5376, 16644, 0, 5952, 16644, 0, 5952, 16644, 0, 5952, 16644, 0, 7424, 85, 0, 7424, 85, 0, 7424, 85, 0, 7424, 85, 0, 9792, 85, 0, 9792, 85, 0, 9792, 85, 0, 9792, 85, 0, 9808, 85, 0, 9808, 85, 0, 9808, 85, 0, 9808, 85, 0, 9824, 85, 0, 9824, 85, 0, 9824, 85, 0, 9824, 85, 0, 768, 65, 0, 768, 65, 0, 5376, 16644, 0, 5376, 16644, 0, 5376, 16644, 0, 5952, 16644, 0, 5952, 16644, 0, 5952, 16644, 0, 7424, 85, 0, 7424, 85, 0, 7424, 85, 0, 7424, 85, 0, 9792, 85, 0, 9792, 85, 0, 9792, 85, 0, 9792, 85, 0, 9808, 85, 0, 9808, 85, 0, 9808, 85, 0, 9808, 85, 0, 9824, 85, 0, 9824, 85, 0, 9824, 85, 0, 9824, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576370334389832_316_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576370334389832_316_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..10f57dd5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576370334389832_316_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,395 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((425 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((440 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((449 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (456 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 8193, 0, 1984, 8193, 0, 8576, 1, 0, 10368, 1, 0, 13824, 256, 0, 14784, 51, 0, 14784, 51, 0, 14784, 51, 0, 14784, 51, 0, 14800, 51, 0, 14800, 51, 0, 14800, 51, 0, 14800, 51, 0, 14816, 51, 0, 14816, 51, 0, 14816, 51, 0, 14816, 51, 0, 15872, 17476, 0, 15872, 17476, 0, 15872, 17476, 0, 15872, 17476, 0, 17232, 32776, 0, 17232, 32776, 0, 17248, 32776, 0, 17248, 32776, 0, 17264, 32776, 0, 17264, 32776, 0, 17792, 1024, 0, 20288, 1, 0, 22464, 256, 0, 25024, 1024, 0, 25040, 1024, 0, 27200, 4, 0, 27216, 4, 0, 29184, 34952, 0, 29184, 34952, 0, 29184, 34952, 0, 29184, 34952, 0, 1984, 8193, 0, 1984, 8193, 0, 8576, 1, 0, 10368, 1, 0, 13824, 256, 0, 14784, 51, 0, 14784, 51, 0, 14784, 51, 0, 14784, 51, 0, 14800, 51, 0, 14800, 51, 0, 14800, 51, 0, 14800, 51, 0, 14816, 51, 0, 14816, 51, 0, 14816, 51, 0, 14816, 51, 0, 15872, 17476, 0, 15872, 17476, 0, 15872, 17476, 0, 15872, 17476, 0, 17232, 32776, 0, 17232, 32776, 0, 17248, 32776, 0, 17248, 32776, 0, 17264, 32776, 0, 17264, 32776, 0, 17792, 1024, 0, 20288, 1, 0, 22464, 256, 0, 25024, 1024, 0, 25040, 1024, 0, 27200, 4, 0, 27216, 4, 0, 29184, 34952, 0, 29184, 34952, 0, 29184, 34952, 0, 29184, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576377227906371_317_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576377227906371_317_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4807e352 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576377227906371_317_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2256, 8194, 0, 2256, 8194, 0, 2260, 8194, 0, 2260, 8194, 0, 2272, 8194, 0, 2272, 8194, 0, 2276, 8194, 0, 2276, 8194, 0, 2288, 8194, 0, 2288, 8194, 0, 2292, 8194, 0, 2292, 8194, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2256, 8194, 0, 2256, 8194, 0, 2260, 8194, 0, 2260, 8194, 0, 2272, 8194, 0, 2272, 8194, 0, 2276, 8194, 0, 2276, 8194, 0, 2288, 8194, 0, 2288, 8194, 0, 2292, 8194, 0, 2292, 8194, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0, 4224, 28086, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576377731259251_318_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576377731259251_318_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5d5fa71d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576377731259251_318_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 4480, 32788, 0, 4480, 32788, 0, 4480, 32788, 0, 4096, 6176, 0, 4096, 6176, 0, 4096, 6176, 0, 3712, 64, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 4480, 32788, 0, 4480, 32788, 0, 4480, 32788, 0, 4096, 6176, 0, 4096, 6176, 0, 4096, 6176, 0, 3712, 64, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576377940845077_319_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576377940845077_319_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..968a9c27 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576377940845077_319_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,268 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 14)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((240 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4480, 17, 0, 4480, 17, 0, 5904, 4096, 0, 5908, 4096, 0, 5920, 4096, 0, 5924, 4096, 0, 7360, 17, 0, 7360, 17, 0, 15376, 4, 0, 15380, 4, 0, 15384, 4, 0, 17280, 4, 0, 17728, 34952, 0, 17728, 34952, 0, 17728, 34952, 0, 17728, 34952, 0, 4480, 17, 0, 4480, 17, 0, 5904, 4096, 0, 5908, 4096, 0, 5920, 4096, 0, 5924, 4096, 0, 7360, 17, 0, 7360, 17, 0, 15376, 4, 0, 15380, 4, 0, 15384, 4, 0, 17280, 4, 0, 17728, 34952, 0, 17728, 34952, 0, 17728, 34952, 0, 17728, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576393614886724_321_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576393614886724_321_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..14f1977f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576393614886724_321_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,292 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 2816, 4, 0, 2560, 1360, 0, 2560, 1360, 0, 2560, 1360, 0, 2560, 1360, 0, 4224, 1, 0, 5696, 1024, 0, 9472, 32768, 0, 9488, 32768, 0, 12800, 32777, 0, 12800, 32777, 0, 12800, 32777, 0, 12816, 32777, 0, 12816, 32777, 0, 12816, 32777, 0, 14144, 32777, 0, 14144, 32777, 0, 14144, 32777, 0, 14160, 32777, 0, 14160, 32777, 0, 14160, 32777, 0, 14912, 1040, 0, 14912, 1040, 0, 15232, 18724, 0, 15232, 18724, 0, 15232, 18724, 0, 15232, 18724, 0, 15232, 18724, 0, 3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 3200, 63491, 0, 2816, 4, 0, 2560, 1360, 0, 2560, 1360, 0, 2560, 1360, 0, 2560, 1360, 0, 4224, 1, 0, 5696, 1024, 0, 9472, 32768, 0, 9488, 32768, 0, 12800, 32777, 0, 12800, 32777, 0, 12800, 32777, 0, 12816, 32777, 0, 12816, 32777, 0, 12816, 32777, 0, 14144, 32777, 0, 14144, 32777, 0, 14144, 32777, 0, 14160, 32777, 0, 14160, 32777, 0, 14160, 32777, 0, 14912, 1040, 0, 14912, 1040, 0, 15232, 18724, 0, 15232, 18724, 0, 15232, 18724, 0, 15232, 18724, 0, 15232, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576395282992171_322_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576395282992171_322_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4ff53ef0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576395282992171_322_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,90 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 32771, 0, 1216, 32771, 0, 1216, 32771, 0, 2128, 32768, 0, 2144, 32768, 0, 2576, 3, 0, 2576, 3, 0, 2592, 3, 0, 2592, 3, 0, 1216, 32771, 0, 1216, 32771, 0, 1216, 32771, 0, 2128, 32768, 0, 2144, 32768, 0, 2576, 3, 0, 2576, 3, 0, 2592, 3, 0, 2592, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576395416131669_323_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576395416131669_323_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..504c1c58 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576395416131669_323_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 3264, 4168, 0, 3264, 4168, 0, 3264, 4168, 0, 2880, 59392, 0, 2880, 59392, 0, 2880, 59392, 0, 2880, 59392, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 3264, 4168, 0, 3264, 4168, 0, 3264, 4168, 0, 2880, 59392, 0, 2880, 59392, 0, 2880, 59392, 0, 2880, 59392, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576395627082287_324_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576395627082287_324_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..143a8ae4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576395627082287_324_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,168 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3328, 2, 0, 4032, 2, 0, 4480, 8192, 0, 4928, 8192, 0, 5248, 32, 0, 5568, 17476, 0, 5568, 17476, 0, 5568, 17476, 0, 5568, 17476, 0, 6016, 34952, 0, 6016, 34952, 0, 6016, 34952, 0, 6016, 34952, 0, 576, 17, 0, 576, 17, 0, 3328, 2, 0, 4032, 2, 0, 4480, 8192, 0, 4928, 8192, 0, 5248, 32, 0, 5568, 17476, 0, 5568, 17476, 0, 5568, 17476, 0, 5568, 17476, 0, 6016, 34952, 0, 6016, 34952, 0, 6016, 34952, 0, 6016, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576395822984346_325_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576395822984346_325_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..485ba9dd --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576395822984346_325_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 5, 0, 1344, 5, 0, 1360, 5, 0, 1360, 5, 0, 1376, 5, 0, 1376, 5, 0, 4352, 5185, 0, 4352, 5185, 0, 4352, 5185, 0, 4352, 5185, 0, 4368, 5185, 0, 4368, 5185, 0, 4368, 5185, 0, 4368, 5185, 0, 4384, 5185, 0, 4384, 5185, 0, 4384, 5185, 0, 4384, 5185, 0, 5056, 64, 0, 5072, 64, 0, 5088, 64, 0, 7360, 64, 0, 1344, 5, 0, 1344, 5, 0, 1360, 5, 0, 1360, 5, 0, 1376, 5, 0, 1376, 5, 0, 4352, 5185, 0, 4352, 5185, 0, 4352, 5185, 0, 4352, 5185, 0, 4368, 5185, 0, 4368, 5185, 0, 4368, 5185, 0, 4368, 5185, 0, 4384, 5185, 0, 4384, 5185, 0, 4384, 5185, 0, 4384, 5185, 0, 5056, 64, 0, 5072, 64, 0, 5088, 64, 0, 7360, 64, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576396484590281_326_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576396484590281_326_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ed2b1b57 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576396484590281_326_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2368, 73, 0, 2368, 73, 0, 2368, 73, 0, 2944, 1040, 0, 2944, 1040, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2368, 73, 0, 2368, 73, 0, 2368, 73, 0, 2944, 1040, 0, 2944, 1040, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576403120610555_328_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576403120610555_328_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..92fef85b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576403120610555_328_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 4161, 0, 1216, 4161, 0, 1216, 4161, 0, 1232, 4161, 0, 1232, 4161, 0, 1232, 4161, 0, 3456, 4161, 0, 3456, 4161, 0, 3456, 4161, 0, 3472, 4161, 0, 3472, 4161, 0, 3472, 4161, 0, 7296, 2, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 1216, 4161, 0, 1216, 4161, 0, 1216, 4161, 0, 1232, 4161, 0, 1232, 4161, 0, 1232, 4161, 0, 3456, 4161, 0, 3456, 4161, 0, 3456, 4161, 0, 3472, 4161, 0, 3472, 4161, 0, 3472, 4161, 0, 7296, 2, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576403903365908_329_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576403903365908_329_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e20451e9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576403903365908_329_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,316 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 1, 0, 6528, 73, 0, 6528, 73, 0, 6528, 73, 0, 7104, 1040, 0, 7104, 1040, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 8064, 85, 0, 8064, 85, 0, 8064, 85, 0, 8064, 85, 0, 9664, 8, 0, 2688, 1, 0, 6528, 73, 0, 6528, 73, 0, 6528, 73, 0, 7104, 1040, 0, 7104, 1040, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 8064, 85, 0, 8064, 85, 0, 8064, 85, 0, 8064, 85, 0, 9664, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576404619847354_330_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576404619847354_330_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e74b2ba3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576404619847354_330_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4864, 256, 0, 4608, 1, 0, 4352, 4224, 0, 4352, 4224, 0, 3968, 2084, 0, 3968, 2084, 0, 3968, 2084, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4864, 256, 0, 4608, 1, 0, 4352, 4224, 0, 4352, 4224, 0, 3968, 2084, 0, 3968, 2084, 0, 3968, 2084, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576410979127886_332_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576410979127886_332_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c70f0c4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576410979127886_332_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1792, 4353, 0, 1792, 4353, 0, 1792, 4353, 0, 4480, 20481, 0, 4480, 20481, 0, 4480, 20481, 0, 4484, 20481, 0, 4484, 20481, 0, 4484, 20481, 0, 4496, 20481, 0, 4496, 20481, 0, 4496, 20481, 0, 4500, 20481, 0, 4500, 20481, 0, 4500, 20481, 0, 4512, 20481, 0, 4512, 20481, 0, 4512, 20481, 0, 4516, 20481, 0, 4516, 20481, 0, 4516, 20481, 0, 6272, 1, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1792, 4353, 0, 1792, 4353, 0, 1792, 4353, 0, 4480, 20481, 0, 4480, 20481, 0, 4480, 20481, 0, 4484, 20481, 0, 4484, 20481, 0, 4484, 20481, 0, 4496, 20481, 0, 4496, 20481, 0, 4496, 20481, 0, 4500, 20481, 0, 4500, 20481, 0, 4500, 20481, 0, 4512, 20481, 0, 4512, 20481, 0, 4512, 20481, 0, 4516, 20481, 0, 4516, 20481, 0, 4516, 20481, 0, 6272, 1, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0, 7488, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576435473694893_334_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576435473694893_334_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..efda6a01 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576435473694893_334_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 32896, 0, 2112, 32896, 0, 6208, 16, 0, 6224, 16, 0, 6240, 16, 0, 2112, 32896, 0, 2112, 32896, 0, 6208, 16, 0, 6224, 16, 0, 6240, 16, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576449942541046_338_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576449942541046_338_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5b7d3a8d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576449942541046_338_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,287 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + } + break; + } + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((249 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((267 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((276 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((285 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7616, 8, 0, 11472, 2048, 0, 12944, 57347, 0, 12944, 57347, 0, 12944, 57347, 0, 12944, 57347, 0, 12944, 57347, 0, 12960, 57347, 0, 12960, 57347, 0, 12960, 57347, 0, 12960, 57347, 0, 12960, 57347, 0, 12976, 57347, 0, 12976, 57347, 0, 12976, 57347, 0, 12976, 57347, 0, 12976, 57347, 0, 15952, 260, 0, 15952, 260, 0, 15956, 260, 0, 15956, 260, 0, 15968, 260, 0, 15968, 260, 0, 15972, 260, 0, 15972, 260, 0, 15984, 260, 0, 15984, 260, 0, 15988, 260, 0, 15988, 260, 0, 17104, 4, 0, 17108, 4, 0, 17120, 4, 0, 17124, 4, 0, 17136, 4, 0, 17140, 4, 0, 17680, 256, 0, 17684, 256, 0, 17696, 256, 0, 17700, 256, 0, 17712, 256, 0, 17716, 256, 0, 7616, 8, 0, 11472, 2048, 0, 12944, 57347, 0, 12944, 57347, 0, 12944, 57347, 0, 12944, 57347, 0, 12944, 57347, 0, 12960, 57347, 0, 12960, 57347, 0, 12960, 57347, 0, 12960, 57347, 0, 12960, 57347, 0, 12976, 57347, 0, 12976, 57347, 0, 12976, 57347, 0, 12976, 57347, 0, 12976, 57347, 0, 15952, 260, 0, 15952, 260, 0, 15956, 260, 0, 15956, 260, 0, 15968, 260, 0, 15968, 260, 0, 15972, 260, 0, 15972, 260, 0, 15984, 260, 0, 15984, 260, 0, 15988, 260, 0, 15988, 260, 0, 17104, 4, 0, 17108, 4, 0, 17120, 4, 0, 17124, 4, 0, 17136, 4, 0, 17140, 4, 0, 17680, 256, 0, 17684, 256, 0, 17696, 256, 0, 17700, 256, 0, 17712, 256, 0, 17716, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576459461216719_341_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576459461216719_341_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..144f8734 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576459461216719_341_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,332 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((291 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 4096, 0, 1232, 4096, 0, 1248, 4096, 0, 5120, 1, 0, 5136, 1, 0, 5152, 1, 0, 7760, 8192, 0, 7776, 8192, 0, 9280, 17476, 0, 9280, 17476, 0, 9280, 17476, 0, 9280, 17476, 0, 9920, 8, 0, 15680, 16, 0, 15696, 16, 0, 15712, 16, 0, 18624, 16385, 0, 18624, 16385, 0, 18628, 16385, 0, 18628, 16385, 0, 18640, 16385, 0, 18640, 16385, 0, 18644, 16385, 0, 18644, 16385, 0, 18656, 16385, 0, 18656, 16385, 0, 18660, 16385, 0, 18660, 16385, 0, 1216, 4096, 0, 1232, 4096, 0, 1248, 4096, 0, 5120, 1, 0, 5136, 1, 0, 5152, 1, 0, 7760, 8192, 0, 7776, 8192, 0, 9280, 17476, 0, 9280, 17476, 0, 9280, 17476, 0, 9280, 17476, 0, 9920, 8, 0, 15680, 16, 0, 15696, 16, 0, 15712, 16, 0, 18624, 16385, 0, 18624, 16385, 0, 18628, 16385, 0, 18628, 16385, 0, 18640, 16385, 0, 18640, 16385, 0, 18644, 16385, 0, 18644, 16385, 0, 18656, 16385, 0, 18656, 16385, 0, 18660, 16385, 0, 18660, 16385, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576468151092578_342_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576468151092578_342_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e10135dc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576468151092578_342_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,215 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 4)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3712, 49159, 0, 3712, 49159, 0, 3712, 49159, 0, 3712, 49159, 0, 3712, 49159, 0, 4800, 1, 0, 5376, 1024, 0, 6912, 1, 0, 7552, 73, 0, 7552, 73, 0, 7552, 73, 0, 8128, 1040, 0, 8128, 1040, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 3712, 49159, 0, 3712, 49159, 0, 3712, 49159, 0, 3712, 49159, 0, 3712, 49159, 0, 4800, 1, 0, 5376, 1024, 0, 6912, 1, 0, 7552, 73, 0, 7552, 73, 0, 7552, 73, 0, 8128, 1040, 0, 8128, 1040, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576468437465456_343_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576468437465456_343_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..88995a7b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576468437465456_343_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,716 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((269 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((329 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i9 = 0; (i9 < 2); i9 = (i9 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((382 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((405 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (428 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (433 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (451 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter10 = 0; + while ((counter10 < 2)) { + counter10 = (counter10 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((467 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((485 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((500 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((507 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (518 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter11 = 0; + while ((counter11 < 3)) { + counter11 = (counter11 + 1); + for (uint i12 = 0; (i12 < 2); i12 = (i12 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((540 << 6) | (counter11 << 4)) | (i12 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((547 << 6) | (counter11 << 4)) | (i12 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i12 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((565 << 6) | (counter11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (574 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (595 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (625 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (635 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (644 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (649 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (653 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (672 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (681 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter13 = 0; + while ((counter13 < 3)) { + counter13 = (counter13 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((720 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((739 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((762 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((777 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (786 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter14 = 0; + while ((counter14 < 2)) { + counter14 = (counter14 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((814 << 6) | (counter14 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 0))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((848 << 6) | (counter14 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((871 << 6) | (counter14 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((894 << 6) | (counter14 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter15 = 0; + while ((counter15 < 2)) { + counter15 = (counter15 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((916 << 6) | (counter15 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i16 = 0; (i16 < 2); i16 = (i16 + 1)) { + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((936 << 6) | (counter15 << 4)) | (i16 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((957 << 6) | (counter15 << 4)) | (i16 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i16 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 408 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2500, 1, 0, 2504, 1, 0, 2516, 1, 0, 2520, 1, 0, 2532, 1, 0, 2536, 1, 0, 3716, 4096, 0, 3720, 4096, 0, 3732, 4096, 0, 3736, 4096, 0, 3748, 4096, 0, 3752, 4096, 0, 4944, 16, 0, 6928, 16, 0, 11584, 256, 0, 19328, 32, 0, 21056, 2, 0, 21072, 2, 0, 27712, 17476, 0, 27712, 17476, 0, 27712, 17476, 0, 27712, 17476, 0, 28864, 32768, 0, 31056, 32768, 0, 31072, 32768, 0, 33152, 32768, 0, 36176, 2048, 0, 36192, 2048, 0, 36208, 2048, 0, 38080, 16385, 0, 38080, 16385, 0, 48784, 64, 0, 48800, 64, 0, 48816, 64, 0, 49744, 4160, 0, 49744, 4160, 0, 49760, 4160, 0, 49760, 4160, 0, 49776, 4160, 0, 49776, 4160, 0, 50304, 1040, 0, 50304, 1040, 0, 52112, 16640, 0, 52112, 16640, 0, 52128, 16640, 0, 52128, 16640, 0, 57232, 256, 0, 57248, 256, 0, 58640, 2560, 0, 58640, 2560, 0, 58656, 2560, 0, 58656, 2560, 0, 59920, 32768, 0, 59924, 32768, 0, 59936, 32768, 0, 59940, 32768, 0, 61264, 8322, 0, 61264, 8322, 0, 61264, 8322, 0, 61268, 8322, 0, 61268, 8322, 0, 61268, 8322, 0, 61280, 8322, 0, 61280, 8322, 0, 61280, 8322, 0, 61284, 8322, 0, 61284, 8322, 0, 61284, 8322, 0, 2500, 1, 0, 2504, 1, 0, 2516, 1, 0, 2520, 1, 0, 2532, 1, 0, 2536, 1, 0, 3716, 4096, 0, 3720, 4096, 0, 3732, 4096, 0, 3736, 4096, 0, 3748, 4096, 0, 3752, 4096, 0, 4944, 16, 0, 6928, 16, 0, 11584, 256, 0, 19328, 32, 0, 21056, 2, 0, 21072, 2, 0, 27712, 17476, 0, 27712, 17476, 0, 27712, 17476, 0, 27712, 17476, 0, 28864, 32768, 0, 31056, 32768, 0, 31072, 32768, 0, 33152, 32768, 0, 36176, 2048, 0, 36192, 2048, 0, 36208, 2048, 0, 38080, 16385, 0, 38080, 16385, 0, 48784, 64, 0, 48800, 64, 0, 48816, 64, 0, 49744, 4160, 0, 49744, 4160, 0, 49760, 4160, 0, 49760, 4160, 0, 49776, 4160, 0, 49776, 4160, 0, 50304, 1040, 0, 50304, 1040, 0, 52112, 16640, 0, 52112, 16640, 0, 52128, 16640, 0, 52128, 16640, 0, 57232, 256, 0, 57248, 256, 0, 58640, 2560, 0, 58640, 2560, 0, 58656, 2560, 0, 58656, 2560, 0, 59920, 32768, 0, 59924, 32768, 0, 59936, 32768, 0, 59940, 32768, 0, 61264, 8322, 0, 61264, 8322, 0, 61264, 8322, 0, 61268, 8322, 0, 61268, 8322, 0, 61268, 8322, 0, 61280, 8322, 0, 61280, 8322, 0, 61280, 8322, 0, 61284, 8322, 0, 61284, 8322, 0, 61284, 8322, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576567873051577_344_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576567873051577_344_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f0932260 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576567873051577_344_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,315 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((294 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((317 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 32777, 0, 1280, 32777, 0, 1280, 32777, 0, 1920, 1, 0, 3200, 32769, 0, 3200, 32769, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6784, 17, 0, 6784, 17, 0, 9408, 17476, 0, 9408, 17476, 0, 9408, 17476, 0, 9408, 17476, 0, 10048, 8, 0, 13504, 2048, 0, 13520, 2048, 0, 14080, 2048, 0, 14096, 2048, 0, 16192, 64, 0, 18832, 32896, 0, 18832, 32896, 0, 18836, 32896, 0, 18836, 32896, 0, 18848, 32896, 0, 18848, 32896, 0, 18852, 32896, 0, 18852, 32896, 0, 18864, 32896, 0, 18864, 32896, 0, 18868, 32896, 0, 18868, 32896, 0, 20304, 128, 0, 20320, 128, 0, 20336, 128, 0, 1280, 32777, 0, 1280, 32777, 0, 1280, 32777, 0, 1920, 1, 0, 3200, 32769, 0, 3200, 32769, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6784, 17, 0, 6784, 17, 0, 9408, 17476, 0, 9408, 17476, 0, 9408, 17476, 0, 9408, 17476, 0, 10048, 8, 0, 13504, 2048, 0, 13520, 2048, 0, 14080, 2048, 0, 14096, 2048, 0, 16192, 64, 0, 18832, 32896, 0, 18832, 32896, 0, 18836, 32896, 0, 18836, 32896, 0, 18848, 32896, 0, 18848, 32896, 0, 18852, 32896, 0, 18852, 32896, 0, 18864, 32896, 0, 18864, 32896, 0, 18868, 32896, 0, 18868, 32896, 0, 20304, 128, 0, 20320, 128, 0, 20336, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576574154673133_345_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576574154673133_345_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..907035e9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576574154673133_345_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,101 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2576, 32, 0, 2592, 32, 0, 2608, 32, 0, 6224, 24, 0, 6224, 24, 0, 6240, 24, 0, 6240, 24, 0, 6256, 24, 0, 6256, 24, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 2576, 32, 0, 2592, 32, 0, 2608, 32, 0, 6224, 24, 0, 6224, 24, 0, 6240, 24, 0, 6240, 24, 0, 6256, 24, 0, 6256, 24, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6672, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6688, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0, 6704, 65280, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576641752930790_347_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576641752930790_347_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa07b381 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576641752930790_347_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,229 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 8)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 256, 0, 1424, 256, 0, 1440, 256, 0, 2432, 256, 0, 2448, 256, 0, 2464, 256, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 6848, 4, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 8192, 64, 0, 9360, 128, 0, 9376, 128, 0, 9392, 128, 0, 11072, 256, 0, 11088, 256, 0, 11904, 320, 0, 11904, 320, 0, 1408, 256, 0, 1424, 256, 0, 1440, 256, 0, 2432, 256, 0, 2448, 256, 0, 2464, 256, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 5120, 61447, 0, 6848, 4, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 7552, 64515, 0, 8192, 64, 0, 9360, 128, 0, 9376, 128, 0, 9392, 128, 0, 11072, 256, 0, 11088, 256, 0, 11904, 320, 0, 11904, 320, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576642427850493_348_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576642427850493_348_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..79875fea --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576642427850493_348_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6976, 2048, 0, 6992, 2048, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 9796, 49153, 0, 9796, 49153, 0, 9796, 49153, 0, 9800, 49153, 0, 9800, 49153, 0, 9800, 49153, 0, 9804, 49153, 0, 9804, 49153, 0, 9804, 49153, 0, 9812, 49153, 0, 9812, 49153, 0, 9812, 49153, 0, 9816, 49153, 0, 9816, 49153, 0, 9816, 49153, 0, 9820, 49153, 0, 9820, 49153, 0, 9820, 49153, 0, 10692, 64, 0, 10696, 64, 0, 10700, 64, 0, 10708, 64, 0, 10712, 64, 0, 10716, 64, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 6976, 2048, 0, 6992, 2048, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8192, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 8208, 57359, 0, 9796, 49153, 0, 9796, 49153, 0, 9796, 49153, 0, 9800, 49153, 0, 9800, 49153, 0, 9800, 49153, 0, 9804, 49153, 0, 9804, 49153, 0, 9804, 49153, 0, 9812, 49153, 0, 9812, 49153, 0, 9812, 49153, 0, 9816, 49153, 0, 9816, 49153, 0, 9816, 49153, 0, 9820, 49153, 0, 9820, 49153, 0, 9820, 49153, 0, 10692, 64, 0, 10696, 64, 0, 10700, 64, 0, 10708, 64, 0, 10712, 64, 0, 10716, 64, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11392, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0, 11408, 64543, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576683182687609_350_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576683182687609_350_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2981d55a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576683182687609_350_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,443 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((149 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((373 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((391 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((423 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((434 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (443 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 708 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1472, 8, 0, 3328, 2080, 0, 3328, 2080, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 5248, 4112, 0, 5248, 4112, 0, 8960, 16389, 0, 8960, 16389, 0, 8960, 16389, 0, 8964, 16389, 0, 8964, 16389, 0, 8964, 16389, 0, 8976, 16389, 0, 8976, 16389, 0, 8976, 16389, 0, 8980, 16389, 0, 8980, 16389, 0, 8980, 16389, 0, 9536, 5440, 0, 9536, 5440, 0, 9536, 5440, 0, 9536, 5440, 0, 9540, 5440, 0, 9540, 5440, 0, 9540, 5440, 0, 9540, 5440, 0, 9552, 5440, 0, 9552, 5440, 0, 9552, 5440, 0, 9552, 5440, 0, 9556, 5440, 0, 9556, 5440, 0, 9556, 5440, 0, 9556, 5440, 0, 11392, 57345, 0, 11392, 57345, 0, 11392, 57345, 0, 11392, 57345, 0, 12032, 1, 0, 14336, 8192, 0, 17408, 16384, 0, 17424, 16384, 0, 17440, 16384, 0, 19328, 16384, 0, 19344, 16384, 0, 19360, 16384, 0, 21248, 32768, 0, 22208, 57345, 0, 22208, 57345, 0, 22208, 57345, 0, 22208, 57345, 0, 22784, 5460, 0, 22784, 5460, 0, 22784, 5460, 0, 22784, 5460, 0, 22784, 5460, 0, 22784, 5460, 0, 23872, 2730, 0, 23872, 2730, 0, 23872, 2730, 0, 23872, 2730, 0, 23872, 2730, 0, 23872, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 25028, 7182, 0, 25028, 7182, 0, 25028, 7182, 0, 25028, 7182, 0, 25028, 7182, 0, 25028, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 27076, 4, 0, 27080, 4, 0, 27092, 4, 0, 27096, 4, 0, 28352, 2730, 0, 28352, 2730, 0, 28352, 2730, 0, 28352, 2730, 0, 28352, 2730, 0, 28352, 2730, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1472, 8, 0, 3328, 2080, 0, 3328, 2080, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 4160, 43690, 0, 5248, 4112, 0, 5248, 4112, 0, 8960, 16389, 0, 8960, 16389, 0, 8960, 16389, 0, 8964, 16389, 0, 8964, 16389, 0, 8964, 16389, 0, 8976, 16389, 0, 8976, 16389, 0, 8976, 16389, 0, 8980, 16389, 0, 8980, 16389, 0, 8980, 16389, 0, 9536, 5440, 0, 9536, 5440, 0, 9536, 5440, 0, 9536, 5440, 0, 9540, 5440, 0, 9540, 5440, 0, 9540, 5440, 0, 9540, 5440, 0, 9552, 5440, 0, 9552, 5440, 0, 9552, 5440, 0, 9552, 5440, 0, 9556, 5440, 0, 9556, 5440, 0, 9556, 5440, 0, 9556, 5440, 0, 11392, 57345, 0, 11392, 57345, 0, 11392, 57345, 0, 11392, 57345, 0, 12032, 1, 0, 14336, 8192, 0, 17408, 16384, 0, 17424, 16384, 0, 17440, 16384, 0, 19328, 16384, 0, 19344, 16384, 0, 19360, 16384, 0, 21248, 32768, 0, 22208, 57345, 0, 22208, 57345, 0, 22208, 57345, 0, 22208, 57345, 0, 22784, 5460, 0, 22784, 5460, 0, 22784, 5460, 0, 22784, 5460, 0, 22784, 5460, 0, 22784, 5460, 0, 23872, 2730, 0, 23872, 2730, 0, 23872, 2730, 0, 23872, 2730, 0, 23872, 2730, 0, 23872, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 23888, 2730, 0, 25028, 7182, 0, 25028, 7182, 0, 25028, 7182, 0, 25028, 7182, 0, 25028, 7182, 0, 25028, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25032, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25044, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 25048, 7182, 0, 27076, 4, 0, 27080, 4, 0, 27092, 4, 0, 27096, 4, 0, 28352, 2730, 0, 28352, 2730, 0, 28352, 2730, 0, 28352, 2730, 0, 28352, 2730, 0, 28352, 2730, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576694621567007_351_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576694621567007_351_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de3c6ae --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576694621567007_351_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 512, 0, 1664, 63, 0, 1664, 63, 0, 1664, 63, 0, 1664, 63, 0, 1664, 63, 0, 1664, 63, 0, 1408, 16384, 0, 1920, 512, 0, 1664, 63, 0, 1664, 63, 0, 1664, 63, 0, 1664, 63, 0, 1664, 63, 0, 1664, 63, 0, 1408, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576713811953592_353_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576713811953592_353_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cbac999e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576713811953592_353_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3968, 4144, 0, 3968, 4144, 0, 3968, 4144, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3200, 17664, 0, 3200, 17664, 0, 3200, 17664, 0, 2944, 1, 0, 2688, 4, 0, 7552, 16, 0, 7556, 16, 0, 7568, 16, 0, 7572, 16, 0, 9792, 16, 0, 9808, 16, 0, 11072, 17, 0, 11072, 17, 0, 11968, 17476, 0, 11968, 17476, 0, 11968, 17476, 0, 11968, 17476, 0, 12416, 34952, 0, 12416, 34952, 0, 12416, 34952, 0, 12416, 34952, 0, 3968, 4144, 0, 3968, 4144, 0, 3968, 4144, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3584, 43658, 0, 3200, 17664, 0, 3200, 17664, 0, 3200, 17664, 0, 2944, 1, 0, 2688, 4, 0, 7552, 16, 0, 7556, 16, 0, 7568, 16, 0, 7572, 16, 0, 9792, 16, 0, 9808, 16, 0, 11072, 17, 0, 11072, 17, 0, 11968, 17476, 0, 11968, 17476, 0, 11968, 17476, 0, 11968, 17476, 0, 12416, 34952, 0, 12416, 34952, 0, 12416, 34952, 0, 12416, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576714342684283_354_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576714342684283_354_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d3165960 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576714342684283_354_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,338 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((204 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((227 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((250 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((273 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 4032, 34952, 0, 4032, 34952, 0, 4032, 34952, 0, 4032, 34952, 0, 7104, 2564, 0, 7104, 2564, 0, 7104, 2564, 0, 6720, 2, 0, 6336, 64, 0, 6080, 32769, 0, 6080, 32769, 0, 9232, 4356, 0, 9232, 4356, 0, 9232, 4356, 0, 9248, 4356, 0, 9248, 4356, 0, 9248, 4356, 0, 10900, 1028, 0, 10900, 1028, 0, 10916, 1028, 0, 10916, 1028, 0, 14548, 1, 0, 14564, 1, 0, 16020, 16384, 0, 16036, 16384, 0, 17492, 16384, 0, 17508, 16384, 0, 20288, 32768, 0, 24000, 2080, 0, 24000, 2080, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 3584, 26214, 0, 4032, 34952, 0, 4032, 34952, 0, 4032, 34952, 0, 4032, 34952, 0, 7104, 2564, 0, 7104, 2564, 0, 7104, 2564, 0, 6720, 2, 0, 6336, 64, 0, 6080, 32769, 0, 6080, 32769, 0, 9232, 4356, 0, 9232, 4356, 0, 9232, 4356, 0, 9248, 4356, 0, 9248, 4356, 0, 9248, 4356, 0, 10900, 1028, 0, 10900, 1028, 0, 10916, 1028, 0, 10916, 1028, 0, 14548, 1, 0, 14564, 1, 0, 16020, 16384, 0, 16036, 16384, 0, 17492, 16384, 0, 17508, 16384, 0, 20288, 32768, 0, 24000, 2080, 0, 24000, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576720098015627_355_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576720098015627_355_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ee6b2515 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576720098015627_355_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 2944, 1, 0, 12800, 256, 0, 13696, 17476, 0, 13696, 17476, 0, 13696, 17476, 0, 13696, 17476, 0, 14144, 34952, 0, 14144, 34952, 0, 14144, 34952, 0, 14144, 34952, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 2944, 1, 0, 12800, 256, 0, 13696, 17476, 0, 13696, 17476, 0, 13696, 17476, 0, 13696, 17476, 0, 14144, 34952, 0, 14144, 34952, 0, 14144, 34952, 0, 14144, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576720502382310_356_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576720502382310_356_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..88545fb5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576720502382310_356_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576720622138851_357_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576720622138851_357_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b60d55f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576720622138851_357_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,289 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 3152, 21504, 0, 3152, 21504, 0, 3152, 21504, 0, 3168, 21504, 0, 3168, 21504, 0, 3168, 21504, 0, 3728, 21504, 0, 3728, 21504, 0, 3728, 21504, 0, 3744, 21504, 0, 3744, 21504, 0, 3744, 21504, 0, 4560, 43008, 0, 4560, 43008, 0, 4560, 43008, 0, 4576, 43008, 0, 4576, 43008, 0, 4576, 43008, 0, 5904, 8, 0, 5920, 8, 0, 5936, 8, 0, 6592, 128, 0, 7232, 17, 0, 7232, 17, 0, 8128, 17476, 0, 8128, 17476, 0, 8128, 17476, 0, 8128, 17476, 0, 9024, 34952, 0, 9024, 34952, 0, 9024, 34952, 0, 9024, 34952, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 3152, 21504, 0, 3152, 21504, 0, 3152, 21504, 0, 3168, 21504, 0, 3168, 21504, 0, 3168, 21504, 0, 3728, 21504, 0, 3728, 21504, 0, 3728, 21504, 0, 3744, 21504, 0, 3744, 21504, 0, 3744, 21504, 0, 4560, 43008, 0, 4560, 43008, 0, 4560, 43008, 0, 4576, 43008, 0, 4576, 43008, 0, 4576, 43008, 0, 5904, 8, 0, 5920, 8, 0, 5936, 8, 0, 6592, 128, 0, 7232, 17, 0, 7232, 17, 0, 8128, 17476, 0, 8128, 17476, 0, 8128, 17476, 0, 8128, 17476, 0, 9024, 34952, 0, 9024, 34952, 0, 9024, 34952, 0, 9024, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576783021859824_361_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576783021859824_361_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f2653ad0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576783021859824_361_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 36864, 0, 1408, 36864, 0, 4224, 4096, 0, 4240, 4096, 0, 4256, 4096, 0, 6912, 1, 0, 6928, 1, 0, 7616, 8, 0, 7632, 8, 0, 9088, 8, 0, 9104, 8, 0, 9664, 1040, 0, 9664, 1040, 0, 9984, 18724, 0, 9984, 18724, 0, 9984, 18724, 0, 9984, 18724, 0, 9984, 18724, 0, 1408, 36864, 0, 1408, 36864, 0, 4224, 4096, 0, 4240, 4096, 0, 4256, 4096, 0, 6912, 1, 0, 6928, 1, 0, 7616, 8, 0, 7632, 8, 0, 9088, 8, 0, 9104, 8, 0, 9664, 1040, 0, 9664, 1040, 0, 9984, 18724, 0, 9984, 18724, 0, 9984, 18724, 0, 9984, 18724, 0, 9984, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576783518924650_362_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576783518924650_362_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..827ab48d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576783518924650_362_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,306 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4368, 4096, 0, 4816, 17, 0, 4816, 17, 0, 6144, 2, 0, 10752, 17476, 0, 10752, 17476, 0, 10752, 17476, 0, 10752, 17476, 0, 11904, 32768, 0, 13888, 2184, 0, 13888, 2184, 0, 13888, 2184, 0, 14528, 8, 0, 15424, 2048, 0, 4368, 4096, 0, 4816, 17, 0, 4816, 17, 0, 6144, 2, 0, 10752, 17476, 0, 10752, 17476, 0, 10752, 17476, 0, 10752, 17476, 0, 11904, 32768, 0, 13888, 2184, 0, 13888, 2184, 0, 13888, 2184, 0, 14528, 8, 0, 15424, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576784077047155_363_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576784077047155_363_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..49b3753c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576784077047155_363_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6016, 32768, 0, 6032, 32768, 0, 8640, 4, 0, 8656, 4, 0, 9728, 81, 0, 9728, 81, 0, 9728, 81, 0, 9744, 81, 0, 9744, 81, 0, 9744, 81, 0, 6016, 32768, 0, 6032, 32768, 0, 8640, 4, 0, 8656, 4, 0, 9728, 81, 0, 9728, 81, 0, 9728, 81, 0, 9744, 81, 0, 9744, 81, 0, 9744, 81, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576785247086723_364_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576785247086723_364_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..39ffd3d8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576785247086723_364_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576787462200010_366_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576787462200010_366_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..69d0f905 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576787462200010_366_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1728, 1, 0, 3200, 16384, 0, 3776, 16384, 0, 4224, 32768, 0, 5184, 3076, 0, 5184, 3076, 0, 5184, 3076, 0, 6144, 12, 0, 6144, 12, 0, 6160, 12, 0, 6160, 12, 0, 7300, 28, 0, 7300, 28, 0, 7300, 28, 0, 7304, 28, 0, 7304, 28, 0, 7304, 28, 0, 7316, 28, 0, 7316, 28, 0, 7316, 28, 0, 7320, 28, 0, 7320, 28, 0, 7320, 28, 0, 9088, 73, 0, 9088, 73, 0, 9088, 73, 0, 9664, 1040, 0, 9664, 1040, 0, 11088, 288, 0, 11088, 288, 0, 11104, 288, 0, 11104, 288, 0, 12688, 2048, 0, 12704, 2048, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1088, 57347, 0, 1728, 1, 0, 3200, 16384, 0, 3776, 16384, 0, 4224, 32768, 0, 5184, 3076, 0, 5184, 3076, 0, 5184, 3076, 0, 6144, 12, 0, 6144, 12, 0, 6160, 12, 0, 6160, 12, 0, 7300, 28, 0, 7300, 28, 0, 7300, 28, 0, 7304, 28, 0, 7304, 28, 0, 7304, 28, 0, 7316, 28, 0, 7316, 28, 0, 7316, 28, 0, 7320, 28, 0, 7320, 28, 0, 7320, 28, 0, 9088, 73, 0, 9088, 73, 0, 9088, 73, 0, 9664, 1040, 0, 9664, 1040, 0, 11088, 288, 0, 11088, 288, 0, 11104, 288, 0, 11104, 288, 0, 12688, 2048, 0, 12704, 2048, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576802376018863_368_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576802376018863_368_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f9da34fb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576802376018863_368_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5136, 130, 0, 5136, 130, 0, 5152, 130, 0, 5152, 130, 0, 5168, 130, 0, 5168, 130, 0, 14144, 2080, 0, 14144, 2080, 0, 14160, 2080, 0, 14160, 2080, 0, 15360, 2048, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5136, 130, 0, 5136, 130, 0, 5152, 130, 0, 5152, 130, 0, 5168, 130, 0, 5168, 130, 0, 14144, 2080, 0, 14144, 2080, 0, 14160, 2080, 0, 14160, 2080, 0, 15360, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576802777998759_369_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576802777998759_369_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2c2584b2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576802777998759_369_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 6160, 16384, 0, 9492, 5201, 0, 9492, 5201, 0, 9492, 5201, 0, 9492, 5201, 0, 9492, 5201, 0, 9496, 5201, 0, 9496, 5201, 0, 9496, 5201, 0, 9496, 5201, 0, 9496, 5201, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 10388, 8192, 0, 10392, 8192, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 6160, 16384, 0, 9492, 5201, 0, 9492, 5201, 0, 9492, 5201, 0, 9492, 5201, 0, 9492, 5201, 0, 9496, 5201, 0, 9496, 5201, 0, 9496, 5201, 0, 9496, 5201, 0, 9496, 5201, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9812, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 9816, 65535, 0, 10388, 8192, 0, 10392, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576806782479353_371_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576806782479353_371_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..39ffd3d8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576806782479353_371_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576806958980262_372_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576806958980262_372_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44042187 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576806958980262_372_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 16388, 0, 2752, 16388, 0, 2496, 2306, 0, 2496, 2306, 0, 2496, 2306, 0, 2240, 46081, 0, 2240, 46081, 0, 2240, 46081, 0, 2240, 46081, 0, 2240, 46081, 0, 2752, 16388, 0, 2752, 16388, 0, 2496, 2306, 0, 2496, 2306, 0, 2496, 2306, 0, 2240, 46081, 0, 2240, 46081, 0, 2240, 46081, 0, 2240, 46081, 0, 2240, 46081, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576807104658779_373_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576807104658779_373_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e30370f3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576807104658779_373_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 16904, 0, 3392, 16904, 0, 3392, 16904, 0, 3136, 32775, 0, 3136, 32775, 0, 3136, 32775, 0, 3136, 32775, 0, 2752, 1024, 0, 2496, 12288, 0, 2496, 12288, 0, 5056, 4096, 0, 5072, 4096, 0, 8464, 4, 0, 8480, 4, 0, 3392, 16904, 0, 3392, 16904, 0, 3392, 16904, 0, 3136, 32775, 0, 3136, 32775, 0, 3136, 32775, 0, 3136, 32775, 0, 2752, 1024, 0, 2496, 12288, 0, 2496, 12288, 0, 5056, 4096, 0, 5072, 4096, 0, 8464, 4, 0, 8480, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576813471124424_374_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576813471124424_374_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b7c529df --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576813471124424_374_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,155 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5952, 16640, 0, 5952, 16640, 0, 5952, 16640, 0, 5952, 16640, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576813569083716_375_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576813569083716_375_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..699a9d25 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576813569083716_375_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((150 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 16, 0, 2560, 16, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 5136, 32768, 0, 5140, 32768, 0, 5152, 32768, 0, 5156, 32768, 0, 5168, 32768, 0, 5172, 32768, 0, 7312, 64, 0, 7316, 64, 0, 7328, 64, 0, 7332, 64, 0, 7344, 64, 0, 7348, 64, 0, 8720, 32, 0, 8724, 32, 0, 8736, 32, 0, 8740, 32, 0, 8752, 32, 0, 8756, 32, 0, 9616, 32768, 0, 9620, 32768, 0, 9632, 32768, 0, 9636, 32768, 0, 9648, 32768, 0, 9652, 32768, 0, 1920, 16, 0, 2560, 16, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 3712, 43690, 0, 5136, 32768, 0, 5140, 32768, 0, 5152, 32768, 0, 5156, 32768, 0, 5168, 32768, 0, 5172, 32768, 0, 7312, 64, 0, 7316, 64, 0, 7328, 64, 0, 7332, 64, 0, 7344, 64, 0, 7348, 64, 0, 8720, 32, 0, 8724, 32, 0, 8736, 32, 0, 8740, 32, 0, 8752, 32, 0, 8756, 32, 0, 9616, 32768, 0, 9620, 32768, 0, 9632, 32768, 0, 9636, 32768, 0, 9648, 32768, 0, 9652, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576823173168502_376_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576823173168502_376_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..edf22c22 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576823173168502_376_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 594 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1616, 57344, 0, 1616, 57344, 0, 1616, 57344, 0, 1632, 57344, 0, 1632, 57344, 0, 1632, 57344, 0, 1648, 57344, 0, 1648, 57344, 0, 1648, 57344, 0, 3600, 57344, 0, 3600, 57344, 0, 3600, 57344, 0, 3616, 57344, 0, 3616, 57344, 0, 3616, 57344, 0, 3632, 57344, 0, 3632, 57344, 0, 3632, 57344, 0, 4048, 6144, 0, 4048, 6144, 0, 4064, 6144, 0, 4064, 6144, 0, 4080, 6144, 0, 4080, 6144, 0, 5264, 1024, 0, 5280, 1024, 0, 5296, 1024, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 992, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1008, 21845, 0, 1616, 57344, 0, 1616, 57344, 0, 1616, 57344, 0, 1632, 57344, 0, 1632, 57344, 0, 1632, 57344, 0, 1648, 57344, 0, 1648, 57344, 0, 1648, 57344, 0, 3600, 57344, 0, 3600, 57344, 0, 3600, 57344, 0, 3616, 57344, 0, 3616, 57344, 0, 3616, 57344, 0, 3632, 57344, 0, 3632, 57344, 0, 3632, 57344, 0, 4048, 6144, 0, 4048, 6144, 0, 4064, 6144, 0, 4064, 6144, 0, 4080, 6144, 0, 4080, 6144, 0, 5264, 1024, 0, 5280, 1024, 0, 5296, 1024, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5712, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5728, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 5744, 255, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6288, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6304, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0, 6320, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576830086475540_377_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576830086475540_377_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ad8c0164 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576830086475540_377_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,455 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (338 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((382 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((397 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (404 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (411 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1728, 8192, 0, 4544, 512, 0, 7488, 1028, 0, 7488, 1028, 0, 7504, 1028, 0, 7504, 1028, 0, 7520, 1028, 0, 7520, 1028, 0, 8576, 34952, 0, 8576, 34952, 0, 8576, 34952, 0, 8576, 34952, 0, 19776, 8736, 0, 19776, 8736, 0, 19776, 8736, 0, 25856, 34952, 0, 25856, 34952, 0, 25856, 34952, 0, 25856, 34952, 0, 26304, 40960, 0, 26304, 40960, 0, 576, 17, 0, 576, 17, 0, 1728, 8192, 0, 4544, 512, 0, 7488, 1028, 0, 7488, 1028, 0, 7504, 1028, 0, 7504, 1028, 0, 7520, 1028, 0, 7520, 1028, 0, 8576, 34952, 0, 8576, 34952, 0, 8576, 34952, 0, 8576, 34952, 0, 19776, 8736, 0, 19776, 8736, 0, 19776, 8736, 0, 25856, 34952, 0, 25856, 34952, 0, 25856, 34952, 0, 25856, 34952, 0, 26304, 40960, 0, 26304, 40960, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576852206536245_379_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576852206536245_379_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f376ddcc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576852206536245_379_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 32769, 0, 1344, 32769, 0, 1360, 32769, 0, 1360, 32769, 0, 1376, 32769, 0, 1376, 32769, 0, 2112, 1040, 0, 2112, 1040, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 4672, 41601, 0, 4672, 41601, 0, 4672, 41601, 0, 4672, 41601, 0, 4672, 41601, 0, 4688, 41601, 0, 4688, 41601, 0, 4688, 41601, 0, 4688, 41601, 0, 4688, 41601, 0, 4704, 41601, 0, 4704, 41601, 0, 4704, 41601, 0, 4704, 41601, 0, 4704, 41601, 0, 5700, 57344, 0, 5700, 57344, 0, 5700, 57344, 0, 5716, 57344, 0, 5716, 57344, 0, 5716, 57344, 0, 5732, 57344, 0, 5732, 57344, 0, 5732, 57344, 0, 1344, 32769, 0, 1344, 32769, 0, 1360, 32769, 0, 1360, 32769, 0, 1376, 32769, 0, 1376, 32769, 0, 2112, 1040, 0, 2112, 1040, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 4672, 41601, 0, 4672, 41601, 0, 4672, 41601, 0, 4672, 41601, 0, 4672, 41601, 0, 4688, 41601, 0, 4688, 41601, 0, 4688, 41601, 0, 4688, 41601, 0, 4688, 41601, 0, 4704, 41601, 0, 4704, 41601, 0, 4704, 41601, 0, 4704, 41601, 0, 4704, 41601, 0, 5700, 57344, 0, 5700, 57344, 0, 5700, 57344, 0, 5716, 57344, 0, 5716, 57344, 0, 5716, 57344, 0, 5732, 57344, 0, 5732, 57344, 0, 5732, 57344, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576856384740977_380_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576856384740977_380_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c66cf7e0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576856384740977_380_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,210 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 7808, 85, 0, 7808, 85, 0, 7808, 85, 0, 7808, 85, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 6912, 18724, 0, 7808, 85, 0, 7808, 85, 0, 7808, 85, 0, 7808, 85, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0, 8384, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756576856627927944_381_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756576856627927944_381_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0f1ef2c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756576856627927944_381_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,497 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + if ((i6 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((361 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((372 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (406 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (421 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (430 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (435 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9728, 8322, 0, 9728, 8322, 0, 9728, 8322, 0, 10768, 2, 0, 10784, 2, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 12288, 17, 0, 12288, 17, 0, 16192, 2, 0, 16208, 2, 0, 16224, 2, 0, 17472, 17476, 0, 17472, 17476, 0, 17472, 17476, 0, 17472, 17476, 0, 17920, 34952, 0, 17920, 34952, 0, 17920, 34952, 0, 17920, 34952, 0, 18816, 17, 0, 18816, 17, 0, 20352, 32, 0, 21952, 1024, 0, 21968, 1024, 0, 21984, 1024, 0, 23108, 16388, 0, 23108, 16388, 0, 23112, 16388, 0, 23112, 16388, 0, 23116, 16388, 0, 23116, 16388, 0, 23124, 16388, 0, 23124, 16388, 0, 23128, 16388, 0, 23128, 16388, 0, 23132, 16388, 0, 23132, 16388, 0, 23140, 16388, 0, 23140, 16388, 0, 23144, 16388, 0, 23144, 16388, 0, 23148, 16388, 0, 23148, 16388, 0, 23808, 1024, 0, 23824, 1024, 0, 23840, 1024, 0, 24768, 32768, 0, 25984, 8, 0, 27840, 2048, 0, 9728, 8322, 0, 9728, 8322, 0, 9728, 8322, 0, 10768, 2, 0, 10784, 2, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 11392, 18724, 0, 12288, 17, 0, 12288, 17, 0, 16192, 2, 0, 16208, 2, 0, 16224, 2, 0, 17472, 17476, 0, 17472, 17476, 0, 17472, 17476, 0, 17472, 17476, 0, 17920, 34952, 0, 17920, 34952, 0, 17920, 34952, 0, 17920, 34952, 0, 18816, 17, 0, 18816, 17, 0, 20352, 32, 0, 21952, 1024, 0, 21968, 1024, 0, 21984, 1024, 0, 23108, 16388, 0, 23108, 16388, 0, 23112, 16388, 0, 23112, 16388, 0, 23116, 16388, 0, 23116, 16388, 0, 23124, 16388, 0, 23124, 16388, 0, 23128, 16388, 0, 23128, 16388, 0, 23132, 16388, 0, 23132, 16388, 0, 23140, 16388, 0, 23140, 16388, 0, 23144, 16388, 0, 23144, 16388, 0, 23148, 16388, 0, 23148, 16388, 0, 23808, 1024, 0, 23824, 1024, 0, 23840, 1024, 0, 24768, 32768, 0, 25984, 8, 0, 27840, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577225941134596_384_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577225941134596_384_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..41397f8a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577225941134596_384_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((110 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 36864, 0, 1040, 36864, 0, 7060, 1040, 0, 7060, 1040, 0, 7064, 1040, 0, 7064, 1040, 0, 7076, 1040, 0, 7076, 1040, 0, 7080, 1040, 0, 7080, 1040, 0, 7092, 1040, 0, 7092, 1040, 0, 7096, 1040, 0, 7096, 1040, 0, 8724, 8322, 0, 8724, 8322, 0, 8724, 8322, 0, 8728, 8322, 0, 8728, 8322, 0, 8728, 8322, 0, 8740, 8322, 0, 8740, 8322, 0, 8740, 8322, 0, 8744, 8322, 0, 8744, 8322, 0, 8744, 8322, 0, 8756, 8322, 0, 8756, 8322, 0, 8756, 8322, 0, 8760, 8322, 0, 8760, 8322, 0, 8760, 8322, 0, 9300, 1040, 0, 9300, 1040, 0, 9304, 1040, 0, 9304, 1040, 0, 9316, 1040, 0, 9316, 1040, 0, 9320, 1040, 0, 9320, 1040, 0, 9332, 1040, 0, 9332, 1040, 0, 9336, 1040, 0, 9336, 1040, 0, 9872, 2, 0, 9888, 2, 0, 9904, 2, 0, 10176, 18724, 0, 10176, 18724, 0, 10176, 18724, 0, 10176, 18724, 0, 10176, 18724, 0, 1040, 36864, 0, 1040, 36864, 0, 7060, 1040, 0, 7060, 1040, 0, 7064, 1040, 0, 7064, 1040, 0, 7076, 1040, 0, 7076, 1040, 0, 7080, 1040, 0, 7080, 1040, 0, 7092, 1040, 0, 7092, 1040, 0, 7096, 1040, 0, 7096, 1040, 0, 8724, 8322, 0, 8724, 8322, 0, 8724, 8322, 0, 8728, 8322, 0, 8728, 8322, 0, 8728, 8322, 0, 8740, 8322, 0, 8740, 8322, 0, 8740, 8322, 0, 8744, 8322, 0, 8744, 8322, 0, 8744, 8322, 0, 8756, 8322, 0, 8756, 8322, 0, 8756, 8322, 0, 8760, 8322, 0, 8760, 8322, 0, 8760, 8322, 0, 9300, 1040, 0, 9300, 1040, 0, 9304, 1040, 0, 9304, 1040, 0, 9316, 1040, 0, 9316, 1040, 0, 9320, 1040, 0, 9320, 1040, 0, 9332, 1040, 0, 9332, 1040, 0, 9336, 1040, 0, 9336, 1040, 0, 9872, 2, 0, 9888, 2, 0, 9904, 2, 0, 10176, 18724, 0, 10176, 18724, 0, 10176, 18724, 0, 10176, 18724, 0, 10176, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577228222189549_385_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577228222189549_385_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..25ce0c95 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577228222189549_385_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1536, 64, 0, 1552, 64, 0, 1568, 64, 0, 2692, 4098, 0, 2692, 4098, 0, 2696, 4098, 0, 2696, 4098, 0, 2700, 4098, 0, 2700, 4098, 0, 2708, 4098, 0, 2708, 4098, 0, 2712, 4098, 0, 2712, 4098, 0, 2716, 4098, 0, 2716, 4098, 0, 2724, 4098, 0, 2724, 4098, 0, 2728, 4098, 0, 2728, 4098, 0, 2732, 4098, 0, 2732, 4098, 0, 3652, 512, 0, 3656, 512, 0, 3660, 512, 0, 3668, 512, 0, 3672, 512, 0, 3676, 512, 0, 3684, 512, 0, 3688, 512, 0, 3692, 512, 0, 4224, 2, 0, 4240, 2, 0, 4256, 2, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 11008, 16384, 0, 11024, 16384, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1536, 64, 0, 1552, 64, 0, 1568, 64, 0, 2692, 4098, 0, 2692, 4098, 0, 2696, 4098, 0, 2696, 4098, 0, 2700, 4098, 0, 2700, 4098, 0, 2708, 4098, 0, 2708, 4098, 0, 2712, 4098, 0, 2712, 4098, 0, 2716, 4098, 0, 2716, 4098, 0, 2724, 4098, 0, 2724, 4098, 0, 2728, 4098, 0, 2728, 4098, 0, 2732, 4098, 0, 2732, 4098, 0, 3652, 512, 0, 3656, 512, 0, 3660, 512, 0, 3668, 512, 0, 3672, 512, 0, 3676, 512, 0, 3684, 512, 0, 3688, 512, 0, 3692, 512, 0, 4224, 2, 0, 4240, 2, 0, 4256, 2, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 11008, 16384, 0, 11024, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577233786355711_386_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577233786355711_386_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b7de78d1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577233786355711_386_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,164 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 14)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4480, 2, 0, 4496, 2, 0, 4512, 2, 0, 4928, 2, 0, 4944, 2, 0, 4960, 2, 0, 5248, 17476, 0, 5248, 17476, 0, 5248, 17476, 0, 5248, 17476, 0, 5696, 34952, 0, 5696, 34952, 0, 5696, 34952, 0, 5696, 34952, 0, 4480, 2, 0, 4496, 2, 0, 4512, 2, 0, 4928, 2, 0, 4944, 2, 0, 4960, 2, 0, 5248, 17476, 0, 5248, 17476, 0, 5248, 17476, 0, 5248, 17476, 0, 5696, 34952, 0, 5696, 34952, 0, 5696, 34952, 0, 5696, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577238969868145_388_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577238969868145_388_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a93fb9ae --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577238969868145_388_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,130 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3520, 8192, 0, 3536, 8192, 0, 3552, 8192, 0, 4548, 8192, 0, 4552, 8192, 0, 4556, 8192, 0, 4564, 8192, 0, 4568, 8192, 0, 4572, 8192, 0, 4580, 8192, 0, 4584, 8192, 0, 4588, 8192, 0, 6528, 18724, 0, 6528, 18724, 0, 6528, 18724, 0, 6528, 18724, 0, 6528, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3520, 8192, 0, 3536, 8192, 0, 3552, 8192, 0, 4548, 8192, 0, 4552, 8192, 0, 4556, 8192, 0, 4564, 8192, 0, 4568, 8192, 0, 4572, 8192, 0, 4580, 8192, 0, 4584, 8192, 0, 4588, 8192, 0, 6528, 18724, 0, 6528, 18724, 0, 6528, 18724, 0, 6528, 18724, 0, 6528, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577243600931508_390_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577243600931508_390_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..99d17f54 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577243600931508_390_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,226 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 57345, 0, 1216, 57345, 0, 1216, 57345, 0, 1216, 57345, 0, 1856, 1, 0, 2752, 18432, 0, 2752, 18432, 0, 3968, 80, 0, 3968, 80, 0, 6400, 1792, 0, 6400, 1792, 0, 6400, 1792, 0, 6416, 1792, 0, 6416, 1792, 0, 6416, 1792, 0, 7040, 17, 0, 7040, 17, 0, 7936, 17476, 0, 7936, 17476, 0, 7936, 17476, 0, 7936, 17476, 0, 8896, 32768, 0, 8912, 32768, 0, 10052, 32768, 0, 10056, 32768, 0, 10060, 32768, 0, 10068, 32768, 0, 10072, 32768, 0, 10076, 32768, 0, 1216, 57345, 0, 1216, 57345, 0, 1216, 57345, 0, 1216, 57345, 0, 1856, 1, 0, 2752, 18432, 0, 2752, 18432, 0, 3968, 80, 0, 3968, 80, 0, 6400, 1792, 0, 6400, 1792, 0, 6400, 1792, 0, 6416, 1792, 0, 6416, 1792, 0, 6416, 1792, 0, 7040, 17, 0, 7040, 17, 0, 7936, 17476, 0, 7936, 17476, 0, 7936, 17476, 0, 7936, 17476, 0, 8896, 32768, 0, 8912, 32768, 0, 10052, 32768, 0, 10056, 32768, 0, 10060, 32768, 0, 10068, 32768, 0, 10072, 32768, 0, 10076, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577245265303780_391_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577245265303780_391_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..101bd5a8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577245265303780_391_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((205 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 342 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 32768, 0, 3200, 33288, 0, 3200, 33288, 0, 3200, 33288, 0, 3904, 1, 0, 5200, 4097, 0, 5200, 4097, 0, 5216, 4097, 0, 5216, 4097, 0, 5904, 1, 0, 5920, 1, 0, 13124, 1040, 0, 13124, 1040, 0, 13128, 1040, 0, 13128, 1040, 0, 13132, 1040, 0, 13132, 1040, 0, 13140, 1040, 0, 13140, 1040, 0, 13144, 1040, 0, 13144, 1040, 0, 13148, 1040, 0, 13148, 1040, 0, 13156, 1040, 0, 13156, 1040, 0, 13160, 1040, 0, 13160, 1040, 0, 13164, 1040, 0, 13164, 1040, 0, 15172, 18, 0, 15172, 18, 0, 15176, 18, 0, 15176, 18, 0, 15180, 18, 0, 15180, 18, 0, 15188, 18, 0, 15188, 18, 0, 15192, 18, 0, 15192, 18, 0, 15196, 18, 0, 15196, 18, 0, 15204, 18, 0, 15204, 18, 0, 15208, 18, 0, 15208, 18, 0, 15212, 18, 0, 15212, 18, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 2496, 32768, 0, 3200, 33288, 0, 3200, 33288, 0, 3200, 33288, 0, 3904, 1, 0, 5200, 4097, 0, 5200, 4097, 0, 5216, 4097, 0, 5216, 4097, 0, 5904, 1, 0, 5920, 1, 0, 13124, 1040, 0, 13124, 1040, 0, 13128, 1040, 0, 13128, 1040, 0, 13132, 1040, 0, 13132, 1040, 0, 13140, 1040, 0, 13140, 1040, 0, 13144, 1040, 0, 13144, 1040, 0, 13148, 1040, 0, 13148, 1040, 0, 13156, 1040, 0, 13156, 1040, 0, 13160, 1040, 0, 13160, 1040, 0, 13164, 1040, 0, 13164, 1040, 0, 15172, 18, 0, 15172, 18, 0, 15176, 18, 0, 15176, 18, 0, 15180, 18, 0, 15180, 18, 0, 15188, 18, 0, 15188, 18, 0, 15192, 18, 0, 15192, 18, 0, 15196, 18, 0, 15196, 18, 0, 15204, 18, 0, 15204, 18, 0, 15208, 18, 0, 15208, 18, 0, 15212, 18, 0, 15212, 18, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0, 15488, 28086, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577252263416441_392_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577252263416441_392_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..501210ad --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577252263416441_392_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,109 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 5184, 16, 0, 5200, 16, 0, 5216, 16, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 5184, 16, 0, 5200, 16, 0, 5216, 16, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577253317207976_394_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577253317207976_394_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..34284825 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577253317207976_394_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,372 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((260 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((275 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((284 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((293 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 0))) { + if ((WaveGetLaneIndex() == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((392 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + break; + } + } + break; + } + case 1: { + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((410 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((419 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (449 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 354 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5440, 1, 0, 5444, 1, 0, 5456, 1, 0, 5460, 1, 0, 6784, 272, 0, 6784, 272, 0, 6800, 272, 0, 6800, 272, 0, 7808, 17, 0, 7808, 17, 0, 14724, 17408, 0, 14724, 17408, 0, 14728, 17408, 0, 14728, 17408, 0, 14732, 17408, 0, 14732, 17408, 0, 14740, 17408, 0, 14740, 17408, 0, 14744, 17408, 0, 14744, 17408, 0, 14748, 17408, 0, 14748, 17408, 0, 14756, 17408, 0, 14756, 17408, 0, 14760, 17408, 0, 14760, 17408, 0, 14764, 17408, 0, 14764, 17408, 0, 18180, 1092, 0, 18180, 1092, 0, 18180, 1092, 0, 18184, 1092, 0, 18184, 1092, 0, 18184, 1092, 0, 18188, 1092, 0, 18188, 1092, 0, 18188, 1092, 0, 18196, 1092, 0, 18196, 1092, 0, 18196, 1092, 0, 18200, 1092, 0, 18200, 1092, 0, 18200, 1092, 0, 18204, 1092, 0, 18204, 1092, 0, 18204, 1092, 0, 18212, 1092, 0, 18212, 1092, 0, 18212, 1092, 0, 18216, 1092, 0, 18216, 1092, 0, 18216, 1092, 0, 18220, 1092, 0, 18220, 1092, 0, 18220, 1092, 0, 19840, 8, 0, 23040, 2048, 0, 28736, 256, 0, 28736, 1, 0, 5440, 1, 0, 5444, 1, 0, 5456, 1, 0, 5460, 1, 0, 6784, 272, 0, 6784, 272, 0, 6800, 272, 0, 6800, 272, 0, 7808, 17, 0, 7808, 17, 0, 14724, 17408, 0, 14724, 17408, 0, 14728, 17408, 0, 14728, 17408, 0, 14732, 17408, 0, 14732, 17408, 0, 14740, 17408, 0, 14740, 17408, 0, 14744, 17408, 0, 14744, 17408, 0, 14748, 17408, 0, 14748, 17408, 0, 14756, 17408, 0, 14756, 17408, 0, 14760, 17408, 0, 14760, 17408, 0, 14764, 17408, 0, 14764, 17408, 0, 18180, 1092, 0, 18180, 1092, 0, 18180, 1092, 0, 18184, 1092, 0, 18184, 1092, 0, 18184, 1092, 0, 18188, 1092, 0, 18188, 1092, 0, 18188, 1092, 0, 18196, 1092, 0, 18196, 1092, 0, 18196, 1092, 0, 18200, 1092, 0, 18200, 1092, 0, 18200, 1092, 0, 18204, 1092, 0, 18204, 1092, 0, 18204, 1092, 0, 18212, 1092, 0, 18212, 1092, 0, 18212, 1092, 0, 18216, 1092, 0, 18216, 1092, 0, 18216, 1092, 0, 18220, 1092, 0, 18220, 1092, 0, 18220, 1092, 0, 19840, 8, 0, 23040, 2048, 0, 28736, 256, 0, 28736, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577283636787462_395_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577283636787462_395_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d916585d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577283636787462_395_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,412 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((251 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((260 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((338 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((349 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((389 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6912, 4369, 0, 6912, 4369, 0, 6912, 4369, 0, 6912, 4369, 0, 9856, 32, 0, 13824, 17476, 0, 13824, 17476, 0, 13824, 17476, 0, 13824, 17476, 0, 17792, 34952, 0, 17792, 34952, 0, 17792, 34952, 0, 17792, 34952, 0, 20928, 33288, 0, 20928, 33288, 0, 20928, 33288, 0, 20944, 33288, 0, 20944, 33288, 0, 20944, 33288, 0, 20960, 33288, 0, 20960, 33288, 0, 20960, 33288, 0, 21632, 33288, 0, 21632, 33288, 0, 21632, 33288, 0, 21648, 33288, 0, 21648, 33288, 0, 21648, 33288, 0, 21664, 33288, 0, 21664, 33288, 0, 21664, 33288, 0, 22912, 5201, 0, 22912, 5201, 0, 22912, 5201, 0, 22912, 5201, 0, 22912, 5201, 0, 23808, 16644, 0, 23808, 16644, 0, 23808, 16644, 0, 24896, 16384, 0, 24912, 16384, 0, 6912, 4369, 0, 6912, 4369, 0, 6912, 4369, 0, 6912, 4369, 0, 9856, 32, 0, 13824, 17476, 0, 13824, 17476, 0, 13824, 17476, 0, 13824, 17476, 0, 17792, 34952, 0, 17792, 34952, 0, 17792, 34952, 0, 17792, 34952, 0, 20928, 33288, 0, 20928, 33288, 0, 20928, 33288, 0, 20944, 33288, 0, 20944, 33288, 0, 20944, 33288, 0, 20960, 33288, 0, 20960, 33288, 0, 20960, 33288, 0, 21632, 33288, 0, 21632, 33288, 0, 21632, 33288, 0, 21648, 33288, 0, 21648, 33288, 0, 21648, 33288, 0, 21664, 33288, 0, 21664, 33288, 0, 21664, 33288, 0, 22912, 5201, 0, 22912, 5201, 0, 22912, 5201, 0, 22912, 5201, 0, 22912, 5201, 0, 23808, 16644, 0, 23808, 16644, 0, 23808, 16644, 0, 24896, 16384, 0, 24912, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577287578669458_396_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577287578669458_396_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..27924b36 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577287578669458_396_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,157 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1792, 64, 0, 2368, 1024, 0, 3008, 4, 0, 4736, 34952, 0, 4736, 34952, 0, 4736, 34952, 0, 4736, 34952, 0, 576, 17, 0, 576, 17, 0, 1792, 64, 0, 2368, 1024, 0, 3008, 4, 0, 4736, 34952, 0, 4736, 34952, 0, 4736, 34952, 0, 4736, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577309260131226_398_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577309260131226_398_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6ec6e8e1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577309260131226_398_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,130 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577309361304040_399_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577309361304040_399_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b11bd505 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577309361304040_399_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,521 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((111 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((373 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (404 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (440 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (455 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (474 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (481 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 4992, 9216, 0, 4992, 9216, 0, 6160, 8194, 0, 6160, 8194, 0, 6176, 8194, 0, 6176, 8194, 0, 8272, 8194, 0, 8272, 8194, 0, 8288, 8194, 0, 8288, 8194, 0, 9600, 16644, 0, 9600, 16644, 0, 9600, 16644, 0, 12800, 260, 0, 12800, 260, 0, 14592, 16384, 0, 14592, 260, 0, 14592, 260, 0, 15680, 2048, 0, 19584, 4096, 0, 19600, 4096, 0, 19616, 4096, 0, 20352, 16, 0, 23888, 256, 0, 23904, 256, 0, 24768, 17476, 0, 24768, 17476, 0, 24768, 17476, 0, 24768, 17476, 0, 25216, 34952, 0, 25216, 34952, 0, 25216, 34952, 0, 25216, 34952, 0, 30784, 2, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 4992, 9216, 0, 4992, 9216, 0, 6160, 8194, 0, 6160, 8194, 0, 6176, 8194, 0, 6176, 8194, 0, 8272, 8194, 0, 8272, 8194, 0, 8288, 8194, 0, 8288, 8194, 0, 9600, 16644, 0, 9600, 16644, 0, 9600, 16644, 0, 12800, 260, 0, 12800, 260, 0, 14592, 16384, 0, 14592, 260, 0, 14592, 260, 0, 15680, 2048, 0, 19584, 4096, 0, 19600, 4096, 0, 19616, 4096, 0, 20352, 16, 0, 23888, 256, 0, 23904, 256, 0, 24768, 17476, 0, 24768, 17476, 0, 24768, 17476, 0, 24768, 17476, 0, 25216, 34952, 0, 25216, 34952, 0, 25216, 34952, 0, 25216, 34952, 0, 30784, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577321611000869_400_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577321611000869_400_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c8edb19d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577321611000869_400_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,192 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 57344, 0, 2112, 57344, 0, 2112, 57344, 0, 1472, 2730, 0, 1472, 2730, 0, 1472, 2730, 0, 1472, 2730, 0, 1472, 2730, 0, 1472, 2730, 0, 1216, 4, 0, 7488, 64, 0, 7492, 64, 0, 7504, 64, 0, 7508, 64, 0, 10112, 4096, 0, 2112, 57344, 0, 2112, 57344, 0, 2112, 57344, 0, 1472, 2730, 0, 1472, 2730, 0, 1472, 2730, 0, 1472, 2730, 0, 1472, 2730, 0, 1472, 2730, 0, 1216, 4, 0, 7488, 64, 0, 7492, 64, 0, 7504, 64, 0, 7508, 64, 0, 10112, 4096, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577321934779724_401_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577321934779724_401_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1287030e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577321934779724_401_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,359 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4096, 0, 1104, 4096, 0, 1120, 4096, 0, 4992, 64, 0, 5568, 1024, 0, 6336, 36, 0, 6336, 36, 0, 7504, 32768, 0, 7520, 32768, 0, 11216, 34816, 0, 11216, 34816, 0, 11232, 34816, 0, 11232, 34816, 0, 13392, 27, 0, 13392, 27, 0, 13392, 27, 0, 13392, 27, 0, 15248, 12288, 0, 15248, 12288, 0, 15888, 3, 0, 15888, 3, 0, 16336, 3, 0, 16336, 3, 0, 16784, 46592, 0, 16784, 46592, 0, 16784, 46592, 0, 16784, 46592, 0, 16784, 46592, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 1088, 4096, 0, 1104, 4096, 0, 1120, 4096, 0, 4992, 64, 0, 5568, 1024, 0, 6336, 36, 0, 6336, 36, 0, 7504, 32768, 0, 7520, 32768, 0, 11216, 34816, 0, 11216, 34816, 0, 11232, 34816, 0, 11232, 34816, 0, 13392, 27, 0, 13392, 27, 0, 13392, 27, 0, 13392, 27, 0, 15248, 12288, 0, 15248, 12288, 0, 15888, 3, 0, 15888, 3, 0, 16336, 3, 0, 16336, 3, 0, 16784, 46592, 0, 16784, 46592, 0, 16784, 46592, 0, 16784, 46592, 0, 16784, 46592, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0, 17280, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577396148235111_403_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577396148235111_403_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bd39c24c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577396148235111_403_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,287 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((225 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 16, 0, 912, 16, 0, 1536, 7, 0, 1536, 7, 0, 1536, 7, 0, 2624, 1, 0, 3520, 4, 0, 9216, 36865, 0, 9216, 36865, 0, 9216, 36865, 0, 10816, 36865, 0, 10816, 36865, 0, 10816, 36865, 0, 11968, 8192, 0, 15552, 8192, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 896, 16, 0, 912, 16, 0, 1536, 7, 0, 1536, 7, 0, 1536, 7, 0, 2624, 1, 0, 3520, 4, 0, 9216, 36865, 0, 9216, 36865, 0, 9216, 36865, 0, 10816, 36865, 0, 10816, 36865, 0, 10816, 36865, 0, 11968, 8192, 0, 15552, 8192, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577396682994395_404_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577396682994395_404_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c1b55c43 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577396682994395_404_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577396899819797_405_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577396899819797_405_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a6ab9126 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577396899819797_405_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,164 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2112, 2080, 0, 2112, 2080, 0, 3648, 4353, 0, 3648, 4353, 0, 3648, 4353, 0, 3392, 32768, 0, 5968, 64, 0, 5984, 64, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2112, 2080, 0, 2112, 2080, 0, 3648, 4353, 0, 3648, 4353, 0, 3648, 4353, 0, 3392, 32768, 0, 5968, 64, 0, 5984, 64, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577397170358071_406_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577397170358071_406_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66be4846 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577397170358071_406_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,325 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 11)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 73, 0, 3072, 73, 0, 3072, 73, 0, 3648, 1040, 0, 3648, 1040, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 9024, 4097, 0, 9024, 4097, 0, 11280, 8192, 0, 11296, 8192, 0, 11856, 8192, 0, 11872, 8192, 0, 12608, 17476, 0, 12608, 17476, 0, 12608, 17476, 0, 12608, 17476, 0, 19648, 34824, 0, 19648, 34824, 0, 19648, 34824, 0, 3072, 73, 0, 3072, 73, 0, 3072, 73, 0, 3648, 1040, 0, 3648, 1040, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 9024, 4097, 0, 9024, 4097, 0, 11280, 8192, 0, 11296, 8192, 0, 11856, 8192, 0, 11872, 8192, 0, 12608, 17476, 0, 12608, 17476, 0, 12608, 17476, 0, 12608, 17476, 0, 19648, 34824, 0, 19648, 34824, 0, 19648, 34824, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577398451728754_407_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577398451728754_407_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09b623ca --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577398451728754_407_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,217 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 2240, 4, 0, 3584, 16384, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 8400, 64, 0, 8416, 64, 0, 8960, 1040, 0, 8960, 1040, 0, 9280, 16644, 0, 9280, 16644, 0, 9280, 16644, 0, 576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 2240, 4, 0, 3584, 16384, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 4032, 52428, 0, 8400, 64, 0, 8416, 64, 0, 8960, 1040, 0, 8960, 1040, 0, 9280, 16644, 0, 9280, 16644, 0, 9280, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577398961335930_408_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577398961335930_408_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..662ca05a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577398961335930_408_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [13760, 32, 0, 14080, 17476, 0, 14080, 17476, 0, 14080, 17476, 0, 14080, 17476, 0, 14528, 34952, 0, 14528, 34952, 0, 14528, 34952, 0, 14528, 34952, 0, 13760, 32, 0, 14080, 17476, 0, 14080, 17476, 0, 14080, 17476, 0, 14080, 17476, 0, 14528, 34952, 0, 14528, 34952, 0, 14528, 34952, 0, 14528, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577410109101783_410_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577410109101783_410_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..71aac7c8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577410109101783_410_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 128, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 2624, 272, 0, 2624, 272, 0, 2368, 2, 0, 5520, 32776, 0, 5520, 32776, 0, 5536, 32776, 0, 5536, 32776, 0, 6080, 1040, 0, 6080, 1040, 0, 6400, 18724, 0, 6400, 18724, 0, 6400, 18724, 0, 6400, 18724, 0, 6400, 18724, 0, 3392, 128, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 2624, 272, 0, 2624, 272, 0, 2368, 2, 0, 5520, 32776, 0, 5520, 32776, 0, 5536, 32776, 0, 5536, 32776, 0, 6080, 1040, 0, 6080, 1040, 0, 6400, 18724, 0, 6400, 18724, 0, 6400, 18724, 0, 6400, 18724, 0, 6400, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577410425681616_411_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577410425681616_411_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9dcb9a3e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577410425681616_411_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 1, 0, 3712, 4096, 0, 3728, 4096, 0, 5696, 17476, 0, 5696, 17476, 0, 5696, 17476, 0, 5696, 17476, 0, 6144, 34952, 0, 6144, 34952, 0, 6144, 34952, 0, 6144, 34952, 0, 2752, 1, 0, 3712, 4096, 0, 3728, 4096, 0, 5696, 17476, 0, 5696, 17476, 0, 5696, 17476, 0, 5696, 17476, 0, 6144, 34952, 0, 6144, 34952, 0, 6144, 34952, 0, 6144, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577410667411402_412_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577410667411402_412_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..94fca3bb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577410667411402_412_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,364 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((228 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 1040, 0, 1792, 1040, 0, 10880, 63, 0, 10880, 63, 0, 10880, 63, 0, 10880, 63, 0, 10880, 63, 0, 10880, 63, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 11520, 17, 0, 11520, 17, 0, 12416, 17476, 0, 12416, 17476, 0, 12416, 17476, 0, 12416, 17476, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 1040, 0, 1792, 1040, 0, 10880, 63, 0, 10880, 63, 0, 10880, 63, 0, 10880, 63, 0, 10880, 63, 0, 10880, 63, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 10624, 65024, 0, 11520, 17, 0, 11520, 17, 0, 12416, 17476, 0, 12416, 17476, 0, 12416, 17476, 0, 12416, 17476, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577411439835295_413_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577411439835295_413_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa7e2dcd --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577411439835295_413_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 1024, 0, 3200, 5120, 0, 3200, 5120, 0, 4736, 512, 0, 5376, 17, 0, 5376, 17, 0, 8064, 32, 0, 9104, 32, 0, 9120, 32, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 1984, 1024, 0, 3200, 5120, 0, 3200, 5120, 0, 4736, 512, 0, 5376, 17, 0, 5376, 17, 0, 8064, 32, 0, 9104, 32, 0, 9120, 32, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 10944, 26214, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0, 11392, 61166, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577411848116376_414_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577411848116376_414_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4a09bba2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577411848116376_414_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5120, 64, 0, 5568, 2056, 0, 5568, 2056, 0, 7296, 2112, 0, 7296, 2112, 0, 13568, 4, 0, 14272, 32775, 0, 14272, 32775, 0, 14272, 32775, 0, 14272, 32775, 0, 14976, 8, 0, 18432, 1024, 0, 5120, 64, 0, 5568, 2056, 0, 5568, 2056, 0, 7296, 2112, 0, 7296, 2112, 0, 13568, 4, 0, 14272, 32775, 0, 14272, 32775, 0, 14272, 32775, 0, 14272, 32775, 0, 14976, 8, 0, 18432, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577412219421004_415_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577412219421004_415_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d6377f40 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577412219421004_415_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,342 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((267 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((274 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 336 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6912, 7, 0, 6912, 7, 0, 6912, 7, 0, 6928, 7, 0, 6928, 7, 0, 6928, 7, 0, 6944, 7, 0, 6944, 7, 0, 6944, 7, 0, 7360, 64512, 0, 7360, 64512, 0, 7360, 64512, 0, 7360, 64512, 0, 7360, 64512, 0, 7360, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 8464, 4096, 0, 8480, 4096, 0, 9104, 1, 0, 9120, 1, 0, 10256, 4096, 0, 10272, 4096, 0, 11344, 4096, 0, 11360, 4096, 0, 14976, 8194, 0, 14976, 8194, 0, 16016, 8706, 0, 16016, 8706, 0, 16016, 8706, 0, 16032, 8706, 0, 16032, 8706, 0, 16032, 8706, 0, 16048, 8706, 0, 16048, 8706, 0, 16048, 8706, 0, 18240, 8194, 0, 18240, 8194, 0, 18560, 17476, 0, 18560, 17476, 0, 18560, 17476, 0, 18560, 17476, 0, 19008, 34952, 0, 19008, 34952, 0, 19008, 34952, 0, 19008, 34952, 0, 6912, 7, 0, 6912, 7, 0, 6912, 7, 0, 6928, 7, 0, 6928, 7, 0, 6928, 7, 0, 6944, 7, 0, 6944, 7, 0, 6944, 7, 0, 7360, 64512, 0, 7360, 64512, 0, 7360, 64512, 0, 7360, 64512, 0, 7360, 64512, 0, 7360, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7376, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 7392, 64512, 0, 8464, 4096, 0, 8480, 4096, 0, 9104, 1, 0, 9120, 1, 0, 10256, 4096, 0, 10272, 4096, 0, 11344, 4096, 0, 11360, 4096, 0, 14976, 8194, 0, 14976, 8194, 0, 16016, 8706, 0, 16016, 8706, 0, 16016, 8706, 0, 16032, 8706, 0, 16032, 8706, 0, 16032, 8706, 0, 16048, 8706, 0, 16048, 8706, 0, 16048, 8706, 0, 18240, 8194, 0, 18240, 8194, 0, 18560, 17476, 0, 18560, 17476, 0, 18560, 17476, 0, 18560, 17476, 0, 19008, 34952, 0, 19008, 34952, 0, 19008, 34952, 0, 19008, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577418181027266_416_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577418181027266_416_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b85754d9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577418181027266_416_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,160 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 3776, 73, 0, 3776, 73, 0, 3776, 73, 0, 4352, 1040, 0, 4352, 1040, 0, 4672, 18724, 0, 4672, 18724, 0, 4672, 18724, 0, 4672, 18724, 0, 4672, 18724, 0, 576, 17, 0, 576, 17, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 3776, 73, 0, 3776, 73, 0, 3776, 73, 0, 4352, 1040, 0, 4352, 1040, 0, 4672, 18724, 0, 4672, 18724, 0, 4672, 18724, 0, 4672, 18724, 0, 4672, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577418625186257_417_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577418625186257_417_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e2007e6c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577418625186257_417_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3584, 32, 0, 4160, 8738, 0, 4160, 8738, 0, 4160, 8738, 0, 4160, 8738, 0, 6352, 17476, 0, 6352, 17476, 0, 6352, 17476, 0, 6352, 17476, 0, 6368, 17476, 0, 6368, 17476, 0, 6368, 17476, 0, 6368, 17476, 0, 8128, 64, 0, 8144, 64, 0, 8160, 64, 0, 8704, 1024, 0, 9024, 18468, 0, 9024, 18468, 0, 9024, 18468, 0, 9024, 18468, 0, 576, 17, 0, 576, 17, 0, 3584, 32, 0, 4160, 8738, 0, 4160, 8738, 0, 4160, 8738, 0, 4160, 8738, 0, 6352, 17476, 0, 6352, 17476, 0, 6352, 17476, 0, 6352, 17476, 0, 6368, 17476, 0, 6368, 17476, 0, 6368, 17476, 0, 6368, 17476, 0, 8128, 64, 0, 8144, 64, 0, 8160, 64, 0, 8704, 1024, 0, 9024, 18468, 0, 9024, 18468, 0, 9024, 18468, 0, 9024, 18468, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577419523265194_418_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577419523265194_418_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4a79ee72 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577419523265194_418_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 7, 0, 576, 7, 0, 576, 7, 0, 1216, 85, 0, 1216, 85, 0, 1216, 85, 0, 1216, 85, 0, 3456, 8704, 0, 3456, 8704, 0, 4816, 257, 0, 4816, 257, 0, 4832, 257, 0, 4832, 257, 0, 11344, 16452, 0, 11344, 16452, 0, 11344, 16452, 0, 11360, 16452, 0, 11360, 16452, 0, 11360, 16452, 0, 13184, 4, 0, 14992, 32768, 0, 15008, 32768, 0, 576, 7, 0, 576, 7, 0, 576, 7, 0, 1216, 85, 0, 1216, 85, 0, 1216, 85, 0, 1216, 85, 0, 3456, 8704, 0, 3456, 8704, 0, 4816, 257, 0, 4816, 257, 0, 4832, 257, 0, 4832, 257, 0, 11344, 16452, 0, 11344, 16452, 0, 11344, 16452, 0, 11360, 16452, 0, 11360, 16452, 0, 11360, 16452, 0, 13184, 4, 0, 14992, 32768, 0, 15008, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577464733062144_422_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577464733062144_422_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..56b786f1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577464733062144_422_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,251 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((236 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1856, 32768, 0, 2896, 32768, 0, 2912, 32768, 0, 2928, 32768, 0, 3920, 32768, 0, 3936, 32768, 0, 3952, 32768, 0, 5888, 32768, 0, 6528, 73, 0, 6528, 73, 0, 6528, 73, 0, 7104, 1040, 0, 7104, 1040, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 9344, 4, 0, 10960, 4, 0, 10976, 4, 0, 10992, 4, 0, 12608, 16385, 0, 12608, 16385, 0, 15104, 10304, 0, 15104, 10304, 0, 15104, 10304, 0, 15108, 10304, 0, 15108, 10304, 0, 15108, 10304, 0, 15120, 10304, 0, 15120, 10304, 0, 15120, 10304, 0, 15124, 10304, 0, 15124, 10304, 0, 15124, 10304, 0, 15136, 10304, 0, 15136, 10304, 0, 15136, 10304, 0, 15140, 10304, 0, 15140, 10304, 0, 15140, 10304, 0, 16320, 16545, 0, 16320, 16545, 0, 16320, 16545, 0, 16320, 16545, 0, 16336, 16545, 0, 16336, 16545, 0, 16336, 16545, 0, 16336, 16545, 0, 16352, 16545, 0, 16352, 16545, 0, 16352, 16545, 0, 16352, 16545, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1856, 32768, 0, 2896, 32768, 0, 2912, 32768, 0, 2928, 32768, 0, 3920, 32768, 0, 3936, 32768, 0, 3952, 32768, 0, 5888, 32768, 0, 6528, 73, 0, 6528, 73, 0, 6528, 73, 0, 7104, 1040, 0, 7104, 1040, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 9344, 4, 0, 10960, 4, 0, 10976, 4, 0, 10992, 4, 0, 12608, 16385, 0, 12608, 16385, 0, 15104, 10304, 0, 15104, 10304, 0, 15104, 10304, 0, 15108, 10304, 0, 15108, 10304, 0, 15108, 10304, 0, 15120, 10304, 0, 15120, 10304, 0, 15120, 10304, 0, 15124, 10304, 0, 15124, 10304, 0, 15124, 10304, 0, 15136, 10304, 0, 15136, 10304, 0, 15136, 10304, 0, 15140, 10304, 0, 15140, 10304, 0, 15140, 10304, 0, 16320, 16545, 0, 16320, 16545, 0, 16320, 16545, 0, 16320, 16545, 0, 16336, 16545, 0, 16336, 16545, 0, 16336, 16545, 0, 16336, 16545, 0, 16352, 16545, 0, 16352, 16545, 0, 16352, 16545, 0, 16352, 16545, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577479079956794_424_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577479079956794_424_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..22309cf1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577479079956794_424_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,275 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 15)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((140 << 6) | (i0 << 4)) | (counter1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((176 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((counter1 == 2)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 366 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4608, 8784, 0, 4608, 8784, 0, 4608, 8784, 0, 4608, 8784, 0, 4624, 8784, 0, 4624, 8784, 0, 4624, 8784, 0, 4624, 8784, 0, 6404, 1, 0, 6408, 1, 0, 6420, 1, 0, 6424, 1, 0, 8965, 512, 0, 8966, 512, 0, 8969, 512, 0, 8970, 512, 0, 8981, 512, 0, 8982, 512, 0, 8985, 512, 0, 8986, 512, 0, 9284, 17476, 0, 9284, 17476, 0, 9284, 17476, 0, 9284, 17476, 0, 9288, 17476, 0, 9288, 17476, 0, 9288, 17476, 0, 9288, 17476, 0, 9300, 17476, 0, 9300, 17476, 0, 9300, 17476, 0, 9300, 17476, 0, 9304, 17476, 0, 9304, 17476, 0, 9304, 17476, 0, 9304, 17476, 0, 10564, 32776, 0, 10564, 32776, 0, 10568, 32776, 0, 10568, 32776, 0, 10580, 32776, 0, 10580, 32776, 0, 10584, 32776, 0, 10584, 32776, 0, 11268, 32776, 0, 11268, 32776, 0, 11272, 32776, 0, 11272, 32776, 0, 11284, 32776, 0, 11284, 32776, 0, 11288, 32776, 0, 11288, 32776, 0, 12992, 85, 0, 12992, 85, 0, 12992, 85, 0, 12992, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4608, 8784, 0, 4608, 8784, 0, 4608, 8784, 0, 4608, 8784, 0, 4624, 8784, 0, 4624, 8784, 0, 4624, 8784, 0, 4624, 8784, 0, 6404, 1, 0, 6408, 1, 0, 6420, 1, 0, 6424, 1, 0, 8965, 512, 0, 8966, 512, 0, 8969, 512, 0, 8970, 512, 0, 8981, 512, 0, 8982, 512, 0, 8985, 512, 0, 8986, 512, 0, 9284, 17476, 0, 9284, 17476, 0, 9284, 17476, 0, 9284, 17476, 0, 9288, 17476, 0, 9288, 17476, 0, 9288, 17476, 0, 9288, 17476, 0, 9300, 17476, 0, 9300, 17476, 0, 9300, 17476, 0, 9300, 17476, 0, 9304, 17476, 0, 9304, 17476, 0, 9304, 17476, 0, 9304, 17476, 0, 10564, 32776, 0, 10564, 32776, 0, 10568, 32776, 0, 10568, 32776, 0, 10580, 32776, 0, 10580, 32776, 0, 10584, 32776, 0, 10584, 32776, 0, 11268, 32776, 0, 11268, 32776, 0, 11272, 32776, 0, 11272, 32776, 0, 11284, 32776, 0, 11284, 32776, 0, 11288, 32776, 0, 11288, 32776, 0, 12992, 85, 0, 12992, 85, 0, 12992, 85, 0, 12992, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577540047873929_426_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577540047873929_426_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..81910eee --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577540047873929_426_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,433 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((198 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((205 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((283 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((293 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((302 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((307 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((311 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((i6 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((329 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 570 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 4161, 0, 1168, 4161, 0, 1168, 4161, 0, 1184, 4161, 0, 1184, 4161, 0, 1184, 4161, 0, 1200, 4161, 0, 1200, 4161, 0, 1200, 4161, 0, 2128, 1, 0, 2132, 1, 0, 2144, 1, 0, 2148, 1, 0, 2160, 1, 0, 2164, 1, 0, 3600, 520, 0, 3600, 520, 0, 3604, 520, 0, 3604, 520, 0, 3616, 520, 0, 3616, 520, 0, 3620, 520, 0, 3620, 520, 0, 3632, 520, 0, 3632, 520, 0, 3636, 520, 0, 3636, 520, 0, 4560, 4096, 0, 4564, 4096, 0, 4576, 4096, 0, 4580, 4096, 0, 4592, 4096, 0, 4596, 4096, 0, 8256, 9216, 0, 8256, 9216, 0, 9936, 16, 0, 9952, 16, 0, 9968, 16, 0, 10368, 9216, 0, 10368, 9216, 0, 14848, 18724, 0, 14848, 18724, 0, 14848, 18724, 0, 14848, 18724, 0, 14848, 18724, 0, 18112, 1, 0, 18116, 1, 0, 18128, 1, 0, 18132, 1, 0, 18752, 1, 0, 18756, 1, 0, 18768, 1, 0, 18772, 1, 0, 19328, 16, 0, 19332, 16, 0, 19344, 16, 0, 19348, 16, 0, 19648, 272, 0, 19648, 272, 0, 19652, 272, 0, 19652, 272, 0, 19664, 272, 0, 19664, 272, 0, 19668, 272, 0, 19668, 272, 0, 21888, 17, 0, 21888, 17, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 1168, 4161, 0, 1168, 4161, 0, 1168, 4161, 0, 1184, 4161, 0, 1184, 4161, 0, 1184, 4161, 0, 1200, 4161, 0, 1200, 4161, 0, 1200, 4161, 0, 2128, 1, 0, 2132, 1, 0, 2144, 1, 0, 2148, 1, 0, 2160, 1, 0, 2164, 1, 0, 3600, 520, 0, 3600, 520, 0, 3604, 520, 0, 3604, 520, 0, 3616, 520, 0, 3616, 520, 0, 3620, 520, 0, 3620, 520, 0, 3632, 520, 0, 3632, 520, 0, 3636, 520, 0, 3636, 520, 0, 4560, 4096, 0, 4564, 4096, 0, 4576, 4096, 0, 4580, 4096, 0, 4592, 4096, 0, 4596, 4096, 0, 8256, 9216, 0, 8256, 9216, 0, 9936, 16, 0, 9952, 16, 0, 9968, 16, 0, 10368, 9216, 0, 10368, 9216, 0, 14848, 18724, 0, 14848, 18724, 0, 14848, 18724, 0, 14848, 18724, 0, 14848, 18724, 0, 18112, 1, 0, 18116, 1, 0, 18128, 1, 0, 18132, 1, 0, 18752, 1, 0, 18756, 1, 0, 18768, 1, 0, 18772, 1, 0, 19328, 16, 0, 19332, 16, 0, 19344, 16, 0, 19348, 16, 0, 19648, 272, 0, 19648, 272, 0, 19652, 272, 0, 19652, 272, 0, 19664, 272, 0, 19664, 272, 0, 19668, 272, 0, 19668, 272, 0, 21888, 17, 0, 21888, 17, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 23808, 30583, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0, 24256, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577586178851384_429_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577586178851384_429_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b17a3bbd --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577586178851384_429_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 2896, 1025, 0, 2896, 1025, 0, 2900, 1025, 0, 2900, 1025, 0, 2912, 1025, 0, 2912, 1025, 0, 2916, 1025, 0, 2916, 1025, 0, 3520, 73, 0, 3520, 73, 0, 3520, 73, 0, 4416, 8322, 0, 4416, 8322, 0, 4416, 8322, 0, 6592, 2, 0, 7872, 18724, 0, 7872, 18724, 0, 7872, 18724, 0, 7872, 18724, 0, 7872, 18724, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 2896, 1025, 0, 2896, 1025, 0, 2900, 1025, 0, 2900, 1025, 0, 2912, 1025, 0, 2912, 1025, 0, 2916, 1025, 0, 2916, 1025, 0, 3520, 73, 0, 3520, 73, 0, 3520, 73, 0, 4416, 8322, 0, 4416, 8322, 0, 4416, 8322, 0, 6592, 2, 0, 7872, 18724, 0, 7872, 18724, 0, 7872, 18724, 0, 7872, 18724, 0, 7872, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577587270229524_430_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577587270229524_430_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5cb7c74 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577587270229524_430_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 24612, 0, 1728, 24612, 0, 1728, 24612, 0, 1728, 24612, 0, 1344, 91, 0, 1344, 91, 0, 1344, 91, 0, 1344, 91, 0, 1344, 91, 0, 1728, 24612, 0, 1728, 24612, 0, 1728, 24612, 0, 1728, 24612, 0, 1344, 91, 0, 1344, 91, 0, 1344, 91, 0, 1344, 91, 0, 1344, 91, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577587368689846_431_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577587368689846_431_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8abd8680 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577587368689846_431_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 512, 0, 2496, 1, 0, 3072, 1, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9472, 514, 0, 9472, 514, 0, 1856, 512, 0, 2496, 1, 0, 3072, 1, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9856, 21845, 0, 9472, 514, 0, 9472, 514, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577587516510711_432_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577587516510711_432_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d201f4f3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577587516510711_432_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3664, 64, 0, 3680, 64, 0, 5072, 21, 0, 5072, 21, 0, 5072, 21, 0, 5076, 21, 0, 5076, 21, 0, 5076, 21, 0, 5080, 21, 0, 5080, 21, 0, 5080, 21, 0, 5088, 21, 0, 5088, 21, 0, 5088, 21, 0, 5092, 21, 0, 5092, 21, 0, 5092, 21, 0, 5096, 21, 0, 5096, 21, 0, 5096, 21, 0, 5904, 5, 0, 5904, 5, 0, 5908, 5, 0, 5908, 5, 0, 5912, 5, 0, 5912, 5, 0, 5920, 5, 0, 5920, 5, 0, 5924, 5, 0, 5924, 5, 0, 5928, 5, 0, 5928, 5, 0, 9744, 84, 0, 9744, 84, 0, 9744, 84, 0, 9760, 84, 0, 9760, 84, 0, 9760, 84, 0, 12352, 2080, 0, 12352, 2080, 0, 12368, 2080, 0, 12368, 2080, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3664, 64, 0, 3680, 64, 0, 5072, 21, 0, 5072, 21, 0, 5072, 21, 0, 5076, 21, 0, 5076, 21, 0, 5076, 21, 0, 5080, 21, 0, 5080, 21, 0, 5080, 21, 0, 5088, 21, 0, 5088, 21, 0, 5088, 21, 0, 5092, 21, 0, 5092, 21, 0, 5092, 21, 0, 5096, 21, 0, 5096, 21, 0, 5096, 21, 0, 5904, 5, 0, 5904, 5, 0, 5908, 5, 0, 5908, 5, 0, 5912, 5, 0, 5912, 5, 0, 5920, 5, 0, 5920, 5, 0, 5924, 5, 0, 5924, 5, 0, 5928, 5, 0, 5928, 5, 0, 9744, 84, 0, 9744, 84, 0, 9744, 84, 0, 9760, 84, 0, 9760, 84, 0, 9760, 84, 0, 12352, 2080, 0, 12352, 2080, 0, 12368, 2080, 0, 12368, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577590477759860_433_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577590477759860_433_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48519559 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577590477759860_433_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 128, 0, 832, 32799, 0, 832, 32799, 0, 832, 32799, 0, 832, 32799, 0, 832, 32799, 0, 832, 32799, 0, 1088, 128, 0, 832, 32799, 0, 832, 32799, 0, 832, 32799, 0, 832, 32799, 0, 832, 32799, 0, 832, 32799, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577590581009727_434_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577590581009727_434_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b24b2a63 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577590581009727_434_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 360 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2176, 8322, 0, 2176, 8322, 0, 2176, 8322, 0, 2192, 8322, 0, 2192, 8322, 0, 2192, 8322, 0, 2208, 8322, 0, 2208, 8322, 0, 2208, 8322, 0, 3520, 32778, 0, 3520, 32778, 0, 3520, 32778, 0, 3524, 32778, 0, 3524, 32778, 0, 3524, 32778, 0, 3536, 32778, 0, 3536, 32778, 0, 3536, 32778, 0, 3540, 32778, 0, 3540, 32778, 0, 3540, 32778, 0, 3552, 32778, 0, 3552, 32778, 0, 3552, 32778, 0, 3556, 32778, 0, 3556, 32778, 0, 3556, 32778, 0, 4160, 10, 0, 4160, 10, 0, 4164, 10, 0, 4164, 10, 0, 4176, 10, 0, 4176, 10, 0, 4180, 10, 0, 4180, 10, 0, 4192, 10, 0, 4192, 10, 0, 4196, 10, 0, 4196, 10, 0, 5504, 8704, 0, 5504, 8704, 0, 5508, 8704, 0, 5508, 8704, 0, 5520, 8704, 0, 5520, 8704, 0, 5524, 8704, 0, 5524, 8704, 0, 5536, 8704, 0, 5536, 8704, 0, 5540, 8704, 0, 5540, 8704, 0, 7424, 21, 0, 7424, 21, 0, 7424, 21, 0, 10752, 5, 0, 10752, 5, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2176, 8322, 0, 2176, 8322, 0, 2176, 8322, 0, 2192, 8322, 0, 2192, 8322, 0, 2192, 8322, 0, 2208, 8322, 0, 2208, 8322, 0, 2208, 8322, 0, 3520, 32778, 0, 3520, 32778, 0, 3520, 32778, 0, 3524, 32778, 0, 3524, 32778, 0, 3524, 32778, 0, 3536, 32778, 0, 3536, 32778, 0, 3536, 32778, 0, 3540, 32778, 0, 3540, 32778, 0, 3540, 32778, 0, 3552, 32778, 0, 3552, 32778, 0, 3552, 32778, 0, 3556, 32778, 0, 3556, 32778, 0, 3556, 32778, 0, 4160, 10, 0, 4160, 10, 0, 4164, 10, 0, 4164, 10, 0, 4176, 10, 0, 4176, 10, 0, 4180, 10, 0, 4180, 10, 0, 4192, 10, 0, 4192, 10, 0, 4196, 10, 0, 4196, 10, 0, 5504, 8704, 0, 5504, 8704, 0, 5508, 8704, 0, 5508, 8704, 0, 5520, 8704, 0, 5520, 8704, 0, 5524, 8704, 0, 5524, 8704, 0, 5536, 8704, 0, 5536, 8704, 0, 5540, 8704, 0, 5540, 8704, 0, 7424, 21, 0, 7424, 21, 0, 7424, 21, 0, 10752, 5, 0, 10752, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577595836525964_435_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577595836525964_435_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..102113b6 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577595836525964_435_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1488, 63488, 0, 1488, 63488, 0, 1488, 63488, 0, 1488, 63488, 0, 1488, 63488, 0, 3152, 16448, 0, 3152, 16448, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 7296, 57344, 0, 7296, 57344, 0, 7296, 57344, 0, 9104, 1, 0, 9120, 1, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1488, 63488, 0, 1488, 63488, 0, 1488, 63488, 0, 1488, 63488, 0, 1488, 63488, 0, 3152, 16448, 0, 3152, 16448, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 3600, 65280, 0, 7296, 57344, 0, 7296, 57344, 0, 7296, 57344, 0, 9104, 1, 0, 9120, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577636764826713_438_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577636764826713_438_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aa9957c0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577636764826713_438_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,157 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 4))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2816, 4164, 0, 2816, 4164, 0, 2816, 4164, 0, 3776, 8704, 0, 3776, 8704, 0, 2816, 4164, 0, 2816, 4164, 0, 2816, 4164, 0, 3776, 8704, 0, 3776, 8704, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577636934401623_439_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577636934401623_439_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7dbf9182 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577636934401623_439_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,152 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3712, 16513, 0, 3712, 16513, 0, 3712, 16513, 0, 3200, 32768, 0, 2944, 8, 0, 2560, 12290, 0, 2560, 12290, 0, 2560, 12290, 0, 5136, 7, 0, 5136, 7, 0, 5136, 7, 0, 5140, 7, 0, 5140, 7, 0, 5140, 7, 0, 5144, 7, 0, 5144, 7, 0, 5144, 7, 0, 5152, 7, 0, 5152, 7, 0, 5152, 7, 0, 5156, 7, 0, 5156, 7, 0, 5156, 7, 0, 5160, 7, 0, 5160, 7, 0, 5160, 7, 0, 5168, 7, 0, 5168, 7, 0, 5168, 7, 0, 5172, 7, 0, 5172, 7, 0, 5172, 7, 0, 5176, 7, 0, 5176, 7, 0, 5176, 7, 0, 6096, 2, 0, 6100, 2, 0, 6104, 2, 0, 6112, 2, 0, 6116, 2, 0, 6120, 2, 0, 6128, 2, 0, 6132, 2, 0, 6136, 2, 0, 6544, 10, 0, 6544, 10, 0, 6548, 10, 0, 6548, 10, 0, 6552, 10, 0, 6552, 10, 0, 6560, 10, 0, 6560, 10, 0, 6564, 10, 0, 6564, 10, 0, 6568, 10, 0, 6568, 10, 0, 6576, 10, 0, 6576, 10, 0, 6580, 10, 0, 6580, 10, 0, 6584, 10, 0, 6584, 10, 0, 6992, 1, 0, 6996, 1, 0, 7000, 1, 0, 7008, 1, 0, 7012, 1, 0, 7016, 1, 0, 7024, 1, 0, 7028, 1, 0, 7032, 1, 0, 3712, 16513, 0, 3712, 16513, 0, 3712, 16513, 0, 3200, 32768, 0, 2944, 8, 0, 2560, 12290, 0, 2560, 12290, 0, 2560, 12290, 0, 5136, 7, 0, 5136, 7, 0, 5136, 7, 0, 5140, 7, 0, 5140, 7, 0, 5140, 7, 0, 5144, 7, 0, 5144, 7, 0, 5144, 7, 0, 5152, 7, 0, 5152, 7, 0, 5152, 7, 0, 5156, 7, 0, 5156, 7, 0, 5156, 7, 0, 5160, 7, 0, 5160, 7, 0, 5160, 7, 0, 5168, 7, 0, 5168, 7, 0, 5168, 7, 0, 5172, 7, 0, 5172, 7, 0, 5172, 7, 0, 5176, 7, 0, 5176, 7, 0, 5176, 7, 0, 6096, 2, 0, 6100, 2, 0, 6104, 2, 0, 6112, 2, 0, 6116, 2, 0, 6120, 2, 0, 6128, 2, 0, 6132, 2, 0, 6136, 2, 0, 6544, 10, 0, 6544, 10, 0, 6548, 10, 0, 6548, 10, 0, 6552, 10, 0, 6552, 10, 0, 6560, 10, 0, 6560, 10, 0, 6564, 10, 0, 6564, 10, 0, 6568, 10, 0, 6568, 10, 0, 6576, 10, 0, 6576, 10, 0, 6580, 10, 0, 6580, 10, 0, 6584, 10, 0, 6584, 10, 0, 6992, 1, 0, 6996, 1, 0, 7000, 1, 0, 7008, 1, 0, 7012, 1, 0, 7016, 1, 0, 7024, 1, 0, 7028, 1, 0, 7032, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577657326028619_440_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577657326028619_440_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b85d8faa --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577657326028619_440_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,102 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 2432, 1, 0, 2448, 1, 0, 2464, 1, 0, 4352, 4096, 0, 4368, 4096, 0, 4384, 4096, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 2432, 1, 0, 2448, 1, 0, 2464, 1, 0, 4352, 4096, 0, 4368, 4096, 0, 4384, 4096, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577705611004534_442_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577705611004534_442_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf2e3f95 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577705611004534_442_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,183 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8640, 1, 0, 11136, 1, 0, 8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8000, 43682, 0, 8640, 1, 0, 11136, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577705757133388_443_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577705757133388_443_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cbc97088 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577705757133388_443_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 2112, 17, 0, 2112, 17, 0, 3008, 17476, 0, 3008, 17476, 0, 3008, 17476, 0, 3008, 17476, 0, 7232, 34952, 0, 7232, 34952, 0, 7232, 34952, 0, 7232, 34952, 0, 7248, 34952, 0, 7248, 34952, 0, 7248, 34952, 0, 7248, 34952, 0, 7264, 34952, 0, 7264, 34952, 0, 7264, 34952, 0, 7264, 34952, 0, 7872, 73, 0, 7872, 73, 0, 7872, 73, 0, 8448, 1040, 0, 8448, 1040, 0, 9872, 32, 0, 9888, 32, 0, 13008, 256, 0, 13024, 256, 0, 14224, 2048, 0, 14240, 2048, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 2112, 17, 0, 2112, 17, 0, 3008, 17476, 0, 3008, 17476, 0, 3008, 17476, 0, 3008, 17476, 0, 7232, 34952, 0, 7232, 34952, 0, 7232, 34952, 0, 7232, 34952, 0, 7248, 34952, 0, 7248, 34952, 0, 7248, 34952, 0, 7248, 34952, 0, 7264, 34952, 0, 7264, 34952, 0, 7264, 34952, 0, 7264, 34952, 0, 7872, 73, 0, 7872, 73, 0, 7872, 73, 0, 8448, 1040, 0, 8448, 1040, 0, 9872, 32, 0, 9888, 32, 0, 13008, 256, 0, 13024, 256, 0, 14224, 2048, 0, 14240, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577708249242939_444_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577708249242939_444_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0522e996 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577708249242939_444_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,419 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((252 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((334 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((350 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 4096, 0, 1552, 4096, 0, 1568, 4096, 0, 4864, 8210, 0, 4864, 8210, 0, 4864, 8210, 0, 6336, 16384, 0, 8960, 4100, 0, 8960, 4100, 0, 8976, 4100, 0, 8976, 4100, 0, 8992, 4100, 0, 8992, 4100, 0, 11072, 4, 0, 11088, 4, 0, 11104, 4, 0, 12416, 43018, 0, 12416, 43018, 0, 12416, 43018, 0, 12416, 43018, 0, 12416, 43018, 0, 13056, 8, 0, 13952, 2048, 0, 18368, 85, 0, 18368, 85, 0, 18368, 85, 0, 18368, 85, 0, 20288, 4, 0, 22416, 68, 0, 22416, 68, 0, 22432, 68, 0, 22432, 68, 0, 22448, 68, 0, 22448, 68, 0, 23360, 4226, 0, 23360, 4226, 0, 23360, 4226, 0, 24576, 1042, 0, 24576, 1042, 0, 24576, 1042, 0, 1536, 4096, 0, 1552, 4096, 0, 1568, 4096, 0, 4864, 8210, 0, 4864, 8210, 0, 4864, 8210, 0, 6336, 16384, 0, 8960, 4100, 0, 8960, 4100, 0, 8976, 4100, 0, 8976, 4100, 0, 8992, 4100, 0, 8992, 4100, 0, 11072, 4, 0, 11088, 4, 0, 11104, 4, 0, 12416, 43018, 0, 12416, 43018, 0, 12416, 43018, 0, 12416, 43018, 0, 12416, 43018, 0, 13056, 8, 0, 13952, 2048, 0, 18368, 85, 0, 18368, 85, 0, 18368, 85, 0, 18368, 85, 0, 20288, 4, 0, 22416, 68, 0, 22416, 68, 0, 22432, 68, 0, 22432, 68, 0, 22448, 68, 0, 22448, 68, 0, 23360, 4226, 0, 23360, 4226, 0, 23360, 4226, 0, 24576, 1042, 0, 24576, 1042, 0, 24576, 1042, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577729539776131_445_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577729539776131_445_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0e0c00be --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577729539776131_445_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,239 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 15)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 32768, 0, 1552, 32768, 0, 2240, 32768, 0, 2256, 32768, 0, 3648, 1040, 0, 3648, 1040, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 5456, 4161, 0, 5456, 4161, 0, 5456, 4161, 0, 5472, 4161, 0, 5472, 4161, 0, 5472, 4161, 0, 6992, 4161, 0, 6992, 4161, 0, 6992, 4161, 0, 7008, 4161, 0, 7008, 4161, 0, 7008, 4161, 0, 9792, 8322, 0, 9792, 8322, 0, 9792, 8322, 0, 9808, 8322, 0, 9808, 8322, 0, 9808, 8322, 0, 10368, 1040, 0, 10368, 1040, 0, 10384, 1040, 0, 10384, 1040, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 1536, 32768, 0, 1552, 32768, 0, 2240, 32768, 0, 2256, 32768, 0, 3648, 1040, 0, 3648, 1040, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 5456, 4161, 0, 5456, 4161, 0, 5456, 4161, 0, 5472, 4161, 0, 5472, 4161, 0, 5472, 4161, 0, 6992, 4161, 0, 6992, 4161, 0, 6992, 4161, 0, 7008, 4161, 0, 7008, 4161, 0, 7008, 4161, 0, 9792, 8322, 0, 9792, 8322, 0, 9792, 8322, 0, 9808, 8322, 0, 9808, 8322, 0, 9808, 8322, 0, 10368, 1040, 0, 10368, 1040, 0, 10384, 1040, 0, 10384, 1040, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577733344409942_446_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577733344409942_446_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..437351b1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577733344409942_446_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,230 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 7360, 4, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 10368, 1, 0, 11264, 16388, 0, 11264, 16388, 0, 11712, 32768, 0, 768, 65, 0, 768, 65, 0, 7360, 4, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 9728, 61447, 0, 10368, 1, 0, 11264, 16388, 0, 11264, 16388, 0, 11712, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577733727831836_447_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577733727831836_447_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a1bf9a6b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577733727831836_447_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,133 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 73, 0, 2112, 73, 0, 2112, 73, 0, 2688, 1040, 0, 2688, 1040, 0, 3008, 18724, 0, 3008, 18724, 0, 3008, 18724, 0, 3008, 18724, 0, 3008, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2112, 73, 0, 2112, 73, 0, 2112, 73, 0, 2688, 1040, 0, 2688, 1040, 0, 3008, 18724, 0, 3008, 18724, 0, 3008, 18724, 0, 3008, 18724, 0, 3008, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577755575519474_451_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577755575519474_451_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d0f36fab --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577755575519474_451_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,147 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 2, 0, 6464, 8321, 0, 6464, 8321, 0, 6464, 8321, 0, 6080, 49152, 0, 6080, 49152, 0, 5696, 7680, 0, 5696, 7680, 0, 5696, 7680, 0, 5696, 7680, 0, 1856, 2, 0, 6464, 8321, 0, 6464, 8321, 0, 6464, 8321, 0, 6080, 49152, 0, 6080, 49152, 0, 5696, 7680, 0, 5696, 7680, 0, 5696, 7680, 0, 5696, 7680, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577764591826063_454_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577764591826063_454_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8b93056e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577764591826063_454_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((counter2 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 4369, 0, 1216, 4369, 0, 1216, 4369, 0, 1216, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 2384, 2, 0, 2400, 2, 0, 2416, 2, 0, 11136, 34952, 0, 11136, 34952, 0, 11136, 34952, 0, 11136, 34952, 0, 1216, 4369, 0, 1216, 4369, 0, 1216, 4369, 0, 1216, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1232, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 1248, 4369, 0, 2384, 2, 0, 2400, 2, 0, 2416, 2, 0, 11136, 34952, 0, 11136, 34952, 0, 11136, 34952, 0, 11136, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577765015200356_455_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577765015200356_455_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5786fae4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577765015200356_455_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2512, 584, 0, 2512, 584, 0, 2512, 584, 0, 2528, 584, 0, 2528, 584, 0, 2528, 584, 0, 2544, 584, 0, 2544, 584, 0, 2544, 584, 0, 3924, 130, 0, 3924, 130, 0, 3928, 130, 0, 3928, 130, 0, 3940, 130, 0, 3940, 130, 0, 3944, 130, 0, 3944, 130, 0, 3956, 130, 0, 3956, 130, 0, 3960, 130, 0, 3960, 130, 0, 5264, 24, 0, 5264, 24, 0, 5280, 24, 0, 5280, 24, 0, 5296, 24, 0, 5296, 24, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2512, 584, 0, 2512, 584, 0, 2512, 584, 0, 2528, 584, 0, 2528, 584, 0, 2528, 584, 0, 2544, 584, 0, 2544, 584, 0, 2544, 584, 0, 3924, 130, 0, 3924, 130, 0, 3928, 130, 0, 3928, 130, 0, 3940, 130, 0, 3940, 130, 0, 3944, 130, 0, 3944, 130, 0, 3956, 130, 0, 3956, 130, 0, 3960, 130, 0, 3960, 130, 0, 5264, 24, 0, 5264, 24, 0, 5280, 24, 0, 5280, 24, 0, 5296, 24, 0, 5296, 24, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577766524445666_456_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577766524445666_456_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2fcd3df2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577766524445666_456_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,127 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3472, 4, 0, 3488, 4, 0, 3504, 4, 0, 3472, 4, 0, 3488, 4, 0, 3504, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577766824957866_457_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577766824957866_457_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db0c8ef3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577766824957866_457_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3712, 1, 0, 3728, 1, 0, 3744, 1, 0, 5184, 17476, 0, 5184, 17476, 0, 5184, 17476, 0, 5184, 17476, 0, 5632, 34952, 0, 5632, 34952, 0, 5632, 34952, 0, 5632, 34952, 0, 3712, 1, 0, 3728, 1, 0, 3744, 1, 0, 5184, 17476, 0, 5184, 17476, 0, 5184, 17476, 0, 5184, 17476, 0, 5632, 34952, 0, 5632, 34952, 0, 5632, 34952, 0, 5632, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577767097105562_458_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577767097105562_458_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..65a98c1e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577767097105562_458_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((26 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1680, 1, 0, 1684, 1, 0, 1696, 1, 0, 1700, 1, 0, 1712, 1, 0, 1716, 1, 0, 4432, 1, 0, 4436, 1, 0, 4448, 1, 0, 4452, 1, 0, 4464, 1, 0, 4468, 1, 0, 1680, 1, 0, 1684, 1, 0, 1696, 1, 0, 1700, 1, 0, 1712, 1, 0, 1716, 1, 0, 4432, 1, 0, 4436, 1, 0, 4448, 1, 0, 4452, 1, 0, 4464, 1, 0, 4468, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577767315098005_459_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577767315098005_459_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..75c021c5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577767315098005_459_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1472, 30583, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577767489413933_460_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577767489413933_460_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..222ec63f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577767489413933_460_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,362 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((355 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((365 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((374 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((378 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 1040, 0, 3392, 1040, 0, 3408, 1040, 0, 3408, 1040, 0, 3424, 1040, 0, 3424, 1040, 0, 4544, 2, 0, 4560, 2, 0, 4576, 2, 0, 6096, 2052, 0, 6096, 2052, 0, 6112, 2052, 0, 6112, 2052, 0, 8020, 4, 0, 8024, 4, 0, 8028, 4, 0, 8036, 4, 0, 8040, 4, 0, 8044, 4, 0, 9236, 16384, 0, 9240, 16384, 0, 9244, 16384, 0, 9252, 16384, 0, 9256, 16384, 0, 9260, 16384, 0, 10192, 2048, 0, 10208, 2048, 0, 16128, 4096, 0, 20608, 17476, 0, 20608, 17476, 0, 20608, 17476, 0, 20608, 17476, 0, 21776, 32768, 0, 21792, 32768, 0, 3392, 1040, 0, 3392, 1040, 0, 3408, 1040, 0, 3408, 1040, 0, 3424, 1040, 0, 3424, 1040, 0, 4544, 2, 0, 4560, 2, 0, 4576, 2, 0, 6096, 2052, 0, 6096, 2052, 0, 6112, 2052, 0, 6112, 2052, 0, 8020, 4, 0, 8024, 4, 0, 8028, 4, 0, 8036, 4, 0, 8040, 4, 0, 8044, 4, 0, 9236, 16384, 0, 9240, 16384, 0, 9244, 16384, 0, 9252, 16384, 0, 9256, 16384, 0, 9260, 16384, 0, 10192, 2048, 0, 10208, 2048, 0, 16128, 4096, 0, 20608, 17476, 0, 20608, 17476, 0, 20608, 17476, 0, 20608, 17476, 0, 21776, 32768, 0, 21792, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577792548391566_461_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577792548391566_461_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e027feaa --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577792548391566_461_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,325 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((255 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((264 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 954 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 8640, 273, 0, 8640, 273, 0, 8640, 273, 0, 8656, 273, 0, 8656, 273, 0, 8656, 273, 0, 8672, 273, 0, 8672, 273, 0, 8672, 273, 0, 9808, 40962, 0, 9808, 40962, 0, 9808, 40962, 0, 9824, 40962, 0, 9824, 40962, 0, 9824, 40962, 0, 10960, 40960, 0, 10960, 40960, 0, 10976, 40960, 0, 10976, 40960, 0, 11664, 32778, 0, 11664, 32778, 0, 11664, 32778, 0, 11680, 32778, 0, 11680, 32778, 0, 11680, 32778, 0, 12368, 32, 0, 12384, 32, 0, 14160, 43018, 0, 14160, 43018, 0, 14160, 43018, 0, 14160, 43018, 0, 14160, 43018, 0, 14176, 43018, 0, 14176, 43018, 0, 14176, 43018, 0, 14176, 43018, 0, 14176, 43018, 0, 14784, 85, 0, 14784, 85, 0, 14784, 85, 0, 14784, 85, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 576, 17, 0, 576, 17, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 8640, 273, 0, 8640, 273, 0, 8640, 273, 0, 8656, 273, 0, 8656, 273, 0, 8656, 273, 0, 8672, 273, 0, 8672, 273, 0, 8672, 273, 0, 9808, 40962, 0, 9808, 40962, 0, 9808, 40962, 0, 9824, 40962, 0, 9824, 40962, 0, 9824, 40962, 0, 10960, 40960, 0, 10960, 40960, 0, 10976, 40960, 0, 10976, 40960, 0, 11664, 32778, 0, 11664, 32778, 0, 11664, 32778, 0, 11680, 32778, 0, 11680, 32778, 0, 11680, 32778, 0, 12368, 32, 0, 12384, 32, 0, 14160, 43018, 0, 14160, 43018, 0, 14160, 43018, 0, 14160, 43018, 0, 14160, 43018, 0, 14176, 43018, 0, 14176, 43018, 0, 14176, 43018, 0, 14176, 43018, 0, 14176, 43018, 0, 14784, 85, 0, 14784, 85, 0, 14784, 85, 0, 14784, 85, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16324, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16328, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16332, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16340, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16344, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16348, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16900, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16904, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16908, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16916, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16920, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 16924, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17472, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0, 17488, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577831294396618_463_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577831294396618_463_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17278a1b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577831294396618_463_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577852896543166_468_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577852896543166_468_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f057a294 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577852896543166_468_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577932082391085_470_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577932082391085_470_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5ffc80ec --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577932082391085_470_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2432, 33288, 0, 2432, 33288, 0, 2432, 33288, 0, 2448, 33288, 0, 2448, 33288, 0, 2448, 33288, 0, 5312, 64, 0, 5328, 64, 0, 5888, 4161, 0, 5888, 4161, 0, 5888, 4161, 0, 5904, 4161, 0, 5904, 4161, 0, 5904, 4161, 0, 6464, 1040, 0, 6464, 1040, 0, 7504, 2080, 0, 7504, 2080, 0, 7520, 2080, 0, 7520, 2080, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2432, 33288, 0, 2432, 33288, 0, 2432, 33288, 0, 2448, 33288, 0, 2448, 33288, 0, 2448, 33288, 0, 5312, 64, 0, 5328, 64, 0, 5888, 4161, 0, 5888, 4161, 0, 5888, 4161, 0, 5904, 4161, 0, 5904, 4161, 0, 5904, 4161, 0, 6464, 1040, 0, 6464, 1040, 0, 7504, 2080, 0, 7504, 2080, 0, 7520, 2080, 0, 7520, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756577946494812046_472_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756577946494812046_472_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..34cc7504 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756577946494812046_472_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578075190518668_474_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578075190518668_474_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c63033dc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578075190518668_474_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 378 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 1024, 0, 1344, 34948, 0, 1344, 34948, 0, 1344, 34948, 0, 1344, 34948, 0, 4672, 1024, 0, 6480, 1, 0, 6496, 1, 0, 7184, 1, 0, 7200, 1, 0, 11648, 32768, 0, 12480, 32768, 0, 13440, 32769, 0, 13440, 32769, 0, 13888, 4096, 0, 15040, 63489, 0, 15040, 63489, 0, 15040, 63489, 0, 15040, 63489, 0, 15040, 63489, 0, 15040, 63489, 0, 16192, 57351, 0, 16192, 57351, 0, 16192, 57351, 0, 16192, 57351, 0, 16192, 57351, 0, 16192, 57351, 0, 17408, 4096, 0, 17424, 4096, 0, 17440, 4096, 0, 18304, 43018, 0, 18304, 43018, 0, 18304, 43018, 0, 18304, 43018, 0, 18304, 43018, 0, 18320, 43018, 0, 18320, 43018, 0, 18320, 43018, 0, 18320, 43018, 0, 18320, 43018, 0, 18336, 43018, 0, 18336, 43018, 0, 18336, 43018, 0, 18336, 43018, 0, 18336, 43018, 0, 19008, 20485, 0, 19008, 20485, 0, 19008, 20485, 0, 19008, 20485, 0, 19024, 20485, 0, 19024, 20485, 0, 19024, 20485, 0, 19024, 20485, 0, 19040, 20485, 0, 19040, 20485, 0, 19040, 20485, 0, 19040, 20485, 0, 20416, 63489, 0, 20416, 63489, 0, 20416, 63489, 0, 20416, 63489, 0, 20416, 63489, 0, 20416, 63489, 0, 1728, 1024, 0, 1344, 34948, 0, 1344, 34948, 0, 1344, 34948, 0, 1344, 34948, 0, 4672, 1024, 0, 6480, 1, 0, 6496, 1, 0, 7184, 1, 0, 7200, 1, 0, 11648, 32768, 0, 12480, 32768, 0, 13440, 32769, 0, 13440, 32769, 0, 13888, 4096, 0, 15040, 63489, 0, 15040, 63489, 0, 15040, 63489, 0, 15040, 63489, 0, 15040, 63489, 0, 15040, 63489, 0, 16192, 57351, 0, 16192, 57351, 0, 16192, 57351, 0, 16192, 57351, 0, 16192, 57351, 0, 16192, 57351, 0, 17408, 4096, 0, 17424, 4096, 0, 17440, 4096, 0, 18304, 43018, 0, 18304, 43018, 0, 18304, 43018, 0, 18304, 43018, 0, 18304, 43018, 0, 18320, 43018, 0, 18320, 43018, 0, 18320, 43018, 0, 18320, 43018, 0, 18320, 43018, 0, 18336, 43018, 0, 18336, 43018, 0, 18336, 43018, 0, 18336, 43018, 0, 18336, 43018, 0, 19008, 20485, 0, 19008, 20485, 0, 19008, 20485, 0, 19008, 20485, 0, 19024, 20485, 0, 19024, 20485, 0, 19024, 20485, 0, 19024, 20485, 0, 19040, 20485, 0, 19040, 20485, 0, 19040, 20485, 0, 19040, 20485, 0, 20416, 63489, 0, 20416, 63489, 0, 20416, 63489, 0, 20416, 63489, 0, 20416, 63489, 0, 20416, 63489, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578078246348754_476_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578078246348754_476_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..846064ca --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578078246348754_476_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 2240, 1, 0, 2256, 1, 0, 2272, 1, 0, 3392, 32769, 0, 3392, 32769, 0, 4864, 32769, 0, 4864, 32769, 0, 5312, 30, 0, 5312, 30, 0, 5312, 30, 0, 5312, 30, 0, 5952, 84, 0, 5952, 84, 0, 5952, 84, 0, 6976, 30, 0, 6976, 30, 0, 6976, 30, 0, 6976, 30, 0, 1280, 1, 0, 2240, 1, 0, 2256, 1, 0, 2272, 1, 0, 3392, 32769, 0, 3392, 32769, 0, 4864, 32769, 0, 4864, 32769, 0, 5312, 30, 0, 5312, 30, 0, 5312, 30, 0, 5312, 30, 0, 5952, 84, 0, 5952, 84, 0, 5952, 84, 0, 6976, 30, 0, 6976, 30, 0, 6976, 30, 0, 6976, 30, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578103853990146_479_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578103853990146_479_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578103853990146_479_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578103950126739_480_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578103950126739_480_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab801df1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578103950126739_480_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,213 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((111 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 504 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1232, 16384, 0, 1248, 16384, 0, 2192, 16384, 0, 2196, 16384, 0, 2200, 16384, 0, 2208, 16384, 0, 2212, 16384, 0, 2216, 16384, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 7120, 8, 0, 7124, 8, 0, 7136, 8, 0, 7140, 8, 0, 8016, 2080, 0, 8016, 2080, 0, 8020, 2080, 0, 8020, 2080, 0, 8032, 2080, 0, 8032, 2080, 0, 8036, 2080, 0, 8036, 2080, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 1232, 16384, 0, 1248, 16384, 0, 2192, 16384, 0, 2196, 16384, 0, 2200, 16384, 0, 2208, 16384, 0, 2212, 16384, 0, 2216, 16384, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6480, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6484, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6496, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 6500, 43690, 0, 7120, 8, 0, 7124, 8, 0, 7136, 8, 0, 7140, 8, 0, 8016, 2080, 0, 8016, 2080, 0, 8020, 2080, 0, 8020, 2080, 0, 8032, 2080, 0, 8032, 2080, 0, 8036, 2080, 0, 8036, 2080, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8592, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8596, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8608, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0, 8612, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578114819271194_482_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578114819271194_482_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578114819271194_482_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578118345202280_484_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578118345202280_484_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f057a294 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578118345202280_484_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578118472364553_485_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578118472364553_485_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cecbeabb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578118472364553_485_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,338 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2368, 17, 0, 2368, 17, 0, 2944, 4369, 0, 2944, 4369, 0, 2944, 4369, 0, 2944, 4369, 0, 3904, 65, 0, 3904, 65, 0, 4864, 16644, 0, 4864, 16644, 0, 4864, 16644, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 13120, 8322, 0, 13120, 8322, 0, 13120, 8322, 0, 16256, 1, 0, 16832, 16, 0, 18240, 4096, 0, 19200, 256, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2368, 17, 0, 2368, 17, 0, 2944, 4369, 0, 2944, 4369, 0, 2944, 4369, 0, 2944, 4369, 0, 3904, 65, 0, 3904, 65, 0, 4864, 16644, 0, 4864, 16644, 0, 4864, 16644, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 5696, 21845, 0, 13120, 8322, 0, 13120, 8322, 0, 13120, 8322, 0, 16256, 1, 0, 16832, 16, 0, 18240, 4096, 0, 19200, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578123411618901_487_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578123411618901_487_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cfd8a975 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578123411618901_487_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,183 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 1, 0, 3152, 1, 0, 3168, 1, 0, 5264, 1, 0, 5280, 1, 0, 6464, 1, 0, 7040, 4369, 0, 7040, 4369, 0, 7040, 4369, 0, 7040, 4369, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 9792, 8, 0, 9796, 8, 0, 9808, 8, 0, 9812, 8, 0, 1792, 1, 0, 3152, 1, 0, 3168, 1, 0, 5264, 1, 0, 5280, 1, 0, 6464, 1, 0, 7040, 4369, 0, 7040, 4369, 0, 7040, 4369, 0, 7040, 4369, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 7360, 30583, 0, 9792, 8, 0, 9796, 8, 0, 9808, 8, 0, 9812, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578162742861379_490_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578162742861379_490_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d734ab8d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578162742861379_490_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,415 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((390 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (404 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2180, 17, 0, 2180, 17, 0, 2184, 17, 0, 2184, 17, 0, 2188, 17, 0, 2188, 17, 0, 2196, 17, 0, 2196, 17, 0, 2200, 17, 0, 2200, 17, 0, 2204, 17, 0, 2204, 17, 0, 2212, 17, 0, 2212, 17, 0, 2216, 17, 0, 2216, 17, 0, 2220, 17, 0, 2220, 17, 0, 5632, 4, 0, 12944, 16384, 0, 12960, 16384, 0, 12976, 16384, 0, 15744, 34952, 0, 15744, 34952, 0, 15744, 34952, 0, 15744, 34952, 0, 16640, 73, 0, 16640, 73, 0, 16640, 73, 0, 26432, 2080, 0, 26432, 2080, 0, 2180, 17, 0, 2180, 17, 0, 2184, 17, 0, 2184, 17, 0, 2188, 17, 0, 2188, 17, 0, 2196, 17, 0, 2196, 17, 0, 2200, 17, 0, 2200, 17, 0, 2204, 17, 0, 2204, 17, 0, 2212, 17, 0, 2212, 17, 0, 2216, 17, 0, 2216, 17, 0, 2220, 17, 0, 2220, 17, 0, 5632, 4, 0, 12944, 16384, 0, 12960, 16384, 0, 12976, 16384, 0, 15744, 34952, 0, 15744, 34952, 0, 15744, 34952, 0, 15744, 34952, 0, 16640, 73, 0, 16640, 73, 0, 16640, 73, 0, 26432, 2080, 0, 26432, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578193168406640_491_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578193168406640_491_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..71515c46 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578193168406640_491_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2368, 1040, 0, 2368, 1040, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 4048, 16384, 0, 4064, 16384, 0, 6672, 21505, 0, 6672, 21505, 0, 6672, 21505, 0, 6672, 21505, 0, 6688, 21505, 0, 6688, 21505, 0, 6688, 21505, 0, 6688, 21505, 0, 7120, 16384, 0, 7136, 16384, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2368, 1040, 0, 2368, 1040, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 4048, 16384, 0, 4064, 16384, 0, 6672, 21505, 0, 6672, 21505, 0, 6672, 21505, 0, 6672, 21505, 0, 6688, 21505, 0, 6688, 21505, 0, 6688, 21505, 0, 6688, 21505, 0, 7120, 16384, 0, 7136, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578193939902696_492_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578193939902696_492_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2835666b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578193939902696_492_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,614 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((350 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((375 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (447 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (470 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (483 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (504 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (523 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((538 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((545 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (553 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((569 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((587 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (597 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (601 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (608 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [17152, 128, 0, 18432, 65, 0, 18432, 65, 0, 19008, 1040, 0, 19008, 1040, 0, 19328, 16644, 0, 19328, 16644, 0, 19328, 16644, 0, 22416, 512, 0, 22432, 512, 0, 22448, 512, 0, 23440, 8194, 0, 23440, 8194, 0, 23456, 8194, 0, 23456, 8194, 0, 23472, 8194, 0, 23472, 8194, 0, 24016, 8194, 0, 24016, 8194, 0, 24032, 8194, 0, 24032, 8194, 0, 24048, 8194, 0, 24048, 8194, 0, 24320, 2080, 0, 24320, 2080, 0, 25536, 4609, 0, 25536, 4609, 0, 25536, 4609, 0, 27904, 32768, 0, 28608, 32768, 0, 30080, 9250, 0, 30080, 9250, 0, 30080, 9250, 0, 30080, 9250, 0, 30912, 1, 0, 34880, 2, 0, 34896, 2, 0, 34912, 2, 0, 36432, 4, 0, 36448, 4, 0, 37584, 4, 0, 37600, 4, 0, 38208, 8, 0, 38912, 15, 0, 38912, 15, 0, 38912, 15, 0, 38912, 15, 0, 17152, 128, 0, 18432, 65, 0, 18432, 65, 0, 19008, 1040, 0, 19008, 1040, 0, 19328, 16644, 0, 19328, 16644, 0, 19328, 16644, 0, 22416, 512, 0, 22432, 512, 0, 22448, 512, 0, 23440, 8194, 0, 23440, 8194, 0, 23456, 8194, 0, 23456, 8194, 0, 23472, 8194, 0, 23472, 8194, 0, 24016, 8194, 0, 24016, 8194, 0, 24032, 8194, 0, 24032, 8194, 0, 24048, 8194, 0, 24048, 8194, 0, 24320, 2080, 0, 24320, 2080, 0, 25536, 4609, 0, 25536, 4609, 0, 25536, 4609, 0, 27904, 32768, 0, 28608, 32768, 0, 30080, 9250, 0, 30080, 9250, 0, 30080, 9250, 0, 30080, 9250, 0, 30912, 1, 0, 34880, 2, 0, 34896, 2, 0, 34912, 2, 0, 36432, 4, 0, 36448, 4, 0, 37584, 4, 0, 37600, 4, 0, 38208, 8, 0, 38912, 15, 0, 38912, 15, 0, 38912, 15, 0, 38912, 15, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578197181013138_493_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578197181013138_493_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f8c48dda --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578197181013138_493_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,393 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 5)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((337 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((344 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((353 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 408 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 6144, 73, 0, 6144, 73, 0, 6144, 73, 0, 7824, 1026, 0, 7824, 1026, 0, 7840, 1026, 0, 7840, 1026, 0, 8784, 8192, 0, 8788, 8192, 0, 8800, 8192, 0, 8804, 8192, 0, 576, 17, 0, 576, 17, 0, 10884, 32, 0, 10888, 32, 0, 10892, 32, 0, 10900, 32, 0, 10904, 32, 0, 10908, 32, 0, 13824, 16384, 0, 14784, 2048, 0, 16640, 64, 0, 16656, 64, 0, 16672, 64, 0, 17280, 1, 0, 17296, 1, 0, 17312, 1, 0, 21584, 146, 0, 21584, 146, 0, 21584, 146, 0, 21588, 146, 0, 21588, 146, 0, 21588, 146, 0, 21600, 146, 0, 21600, 146, 0, 21600, 146, 0, 21604, 146, 0, 21604, 146, 0, 21604, 146, 0, 21616, 146, 0, 21616, 146, 0, 21616, 146, 0, 21620, 146, 0, 21620, 146, 0, 21620, 146, 0, 22608, 16, 0, 22612, 16, 0, 22624, 16, 0, 22628, 16, 0, 22640, 16, 0, 22644, 16, 0, 22912, 18724, 0, 22912, 18724, 0, 22912, 18724, 0, 22912, 18724, 0, 22912, 18724, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 1984, 30583, 0, 6144, 73, 0, 6144, 73, 0, 6144, 73, 0, 7824, 1026, 0, 7824, 1026, 0, 7840, 1026, 0, 7840, 1026, 0, 8784, 8192, 0, 8788, 8192, 0, 8800, 8192, 0, 8804, 8192, 0, 576, 17, 0, 576, 17, 0, 10884, 32, 0, 10888, 32, 0, 10892, 32, 0, 10900, 32, 0, 10904, 32, 0, 10908, 32, 0, 13824, 16384, 0, 14784, 2048, 0, 16640, 64, 0, 16656, 64, 0, 16672, 64, 0, 17280, 1, 0, 17296, 1, 0, 17312, 1, 0, 21584, 146, 0, 21584, 146, 0, 21584, 146, 0, 21588, 146, 0, 21588, 146, 0, 21588, 146, 0, 21600, 146, 0, 21600, 146, 0, 21600, 146, 0, 21604, 146, 0, 21604, 146, 0, 21604, 146, 0, 21616, 146, 0, 21616, 146, 0, 21616, 146, 0, 21620, 146, 0, 21620, 146, 0, 21620, 146, 0, 22608, 16, 0, 22612, 16, 0, 22624, 16, 0, 22628, 16, 0, 22640, 16, 0, 22644, 16, 0, 22912, 18724, 0, 22912, 18724, 0, 22912, 18724, 0, 22912, 18724, 0, 22912, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578235881964969_495_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578235881964969_495_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..43b2b944 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578235881964969_495_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 2176, 16644, 0, 2176, 16644, 0, 2176, 16644, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 2176, 16644, 0, 2176, 16644, 0, 2176, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578236004715251_496_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578236004715251_496_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5937fde0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578236004715251_496_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((26 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((51 << 6) | (i0 << 4)) | (i1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((58 << 6) | (i0 << 4)) | (i1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 2080, 0, 1664, 2080, 0, 1668, 2080, 0, 1668, 2080, 0, 1672, 2080, 0, 1672, 2080, 0, 1680, 2080, 0, 1680, 2080, 0, 1684, 2080, 0, 1684, 2080, 0, 1688, 2080, 0, 1688, 2080, 0, 3264, 2, 0, 3265, 2, 0, 3268, 2, 0, 3269, 2, 0, 3272, 2, 0, 3273, 2, 0, 3280, 2, 0, 3281, 2, 0, 3284, 2, 0, 3285, 2, 0, 3288, 2, 0, 3289, 2, 0, 5568, 18456, 0, 5568, 18456, 0, 5568, 18456, 0, 5568, 18456, 0, 5572, 18456, 0, 5572, 18456, 0, 5572, 18456, 0, 5572, 18456, 0, 5576, 18456, 0, 5576, 18456, 0, 5576, 18456, 0, 5576, 18456, 0, 5584, 18456, 0, 5584, 18456, 0, 5584, 18456, 0, 5584, 18456, 0, 5588, 18456, 0, 5588, 18456, 0, 5588, 18456, 0, 5588, 18456, 0, 5592, 18456, 0, 5592, 18456, 0, 5592, 18456, 0, 5592, 18456, 0, 1664, 2080, 0, 1664, 2080, 0, 1668, 2080, 0, 1668, 2080, 0, 1672, 2080, 0, 1672, 2080, 0, 1680, 2080, 0, 1680, 2080, 0, 1684, 2080, 0, 1684, 2080, 0, 1688, 2080, 0, 1688, 2080, 0, 3264, 2, 0, 3265, 2, 0, 3268, 2, 0, 3269, 2, 0, 3272, 2, 0, 3273, 2, 0, 3280, 2, 0, 3281, 2, 0, 3284, 2, 0, 3285, 2, 0, 3288, 2, 0, 3289, 2, 0, 5568, 18456, 0, 5568, 18456, 0, 5568, 18456, 0, 5568, 18456, 0, 5572, 18456, 0, 5572, 18456, 0, 5572, 18456, 0, 5572, 18456, 0, 5576, 18456, 0, 5576, 18456, 0, 5576, 18456, 0, 5576, 18456, 0, 5584, 18456, 0, 5584, 18456, 0, 5584, 18456, 0, 5584, 18456, 0, 5588, 18456, 0, 5588, 18456, 0, 5588, 18456, 0, 5588, 18456, 0, 5592, 18456, 0, 5592, 18456, 0, 5592, 18456, 0, 5592, 18456, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578245093492087_498_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578245093492087_498_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e67cd2f7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578245093492087_498_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,239 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((105 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 85, 0, 3392, 85, 0, 3392, 85, 0, 3392, 85, 0, 4352, 43008, 0, 4352, 43008, 0, 4352, 43008, 0, 4368, 43008, 0, 4368, 43008, 0, 4368, 43008, 0, 4384, 43008, 0, 4384, 43008, 0, 4384, 43008, 0, 7872, 8192, 0, 7888, 8192, 0, 7904, 8192, 0, 8320, 2, 0, 8336, 2, 0, 8352, 2, 0, 9152, 17, 0, 9152, 17, 0, 10048, 17476, 0, 10048, 17476, 0, 10048, 17476, 0, 10048, 17476, 0, 10496, 34952, 0, 10496, 34952, 0, 10496, 34952, 0, 10496, 34952, 0, 3392, 85, 0, 3392, 85, 0, 3392, 85, 0, 3392, 85, 0, 4352, 43008, 0, 4352, 43008, 0, 4352, 43008, 0, 4368, 43008, 0, 4368, 43008, 0, 4368, 43008, 0, 4384, 43008, 0, 4384, 43008, 0, 4384, 43008, 0, 7872, 8192, 0, 7888, 8192, 0, 7904, 8192, 0, 8320, 2, 0, 8336, 2, 0, 8352, 2, 0, 9152, 17, 0, 9152, 17, 0, 10048, 17476, 0, 10048, 17476, 0, 10048, 17476, 0, 10048, 17476, 0, 10496, 34952, 0, 10496, 34952, 0, 10496, 34952, 0, 10496, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578248329369012_499_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578248329369012_499_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b44511c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578248329369012_499_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,263 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 3088, 16930, 0, 3088, 16930, 0, 3088, 16930, 0, 3088, 16930, 0, 3104, 16930, 0, 3104, 16930, 0, 3104, 16930, 0, 3104, 16930, 0, 4048, 32, 0, 4052, 32, 0, 4064, 32, 0, 4068, 32, 0, 5440, 34952, 0, 5440, 34952, 0, 5440, 34952, 0, 5440, 34952, 0, 9360, 128, 0, 9376, 128, 0, 11392, 17448, 0, 11392, 17448, 0, 11392, 17448, 0, 11392, 17448, 0, 12032, 17, 0, 12032, 17, 0, 12928, 17476, 0, 12928, 17476, 0, 12928, 17476, 0, 12928, 17476, 0, 576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 3088, 16930, 0, 3088, 16930, 0, 3088, 16930, 0, 3088, 16930, 0, 3104, 16930, 0, 3104, 16930, 0, 3104, 16930, 0, 3104, 16930, 0, 4048, 32, 0, 4052, 32, 0, 4064, 32, 0, 4068, 32, 0, 5440, 34952, 0, 5440, 34952, 0, 5440, 34952, 0, 5440, 34952, 0, 9360, 128, 0, 9376, 128, 0, 11392, 17448, 0, 11392, 17448, 0, 11392, 17448, 0, 11392, 17448, 0, 12032, 17, 0, 12032, 17, 0, 12928, 17476, 0, 12928, 17476, 0, 12928, 17476, 0, 12928, 17476, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578251201664918_500_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578251201664918_500_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9a485ebd --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578251201664918_500_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578251286458792_501_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578251286458792_501_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4e8b254f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578251286458792_501_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1216, 73, 0, 1216, 73, 0, 1216, 73, 0, 1792, 1040, 0, 1792, 1040, 0, 3520, 18724, 0, 3520, 18724, 0, 3520, 18724, 0, 3520, 18724, 0, 3520, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1216, 73, 0, 1216, 73, 0, 1216, 73, 0, 1792, 1040, 0, 1792, 1040, 0, 3520, 18724, 0, 3520, 18724, 0, 3520, 18724, 0, 3520, 18724, 0, 3520, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578251460871537_502_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578251460871537_502_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..79c0aa5b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578251460871537_502_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,292 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 5, 0, 768, 5, 0, 1344, 5, 0, 1344, 5, 0, 5440, 65, 0, 5440, 65, 0, 9216, 4096, 0, 14016, 32776, 0, 14016, 32776, 0, 14976, 8, 0, 15552, 5201, 0, 15552, 5201, 0, 15552, 5201, 0, 15552, 5201, 0, 15552, 5201, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 768, 5, 0, 768, 5, 0, 1344, 5, 0, 1344, 5, 0, 5440, 65, 0, 5440, 65, 0, 9216, 4096, 0, 14016, 32776, 0, 14016, 32776, 0, 14976, 8, 0, 15552, 5201, 0, 15552, 5201, 0, 15552, 5201, 0, 15552, 5201, 0, 15552, 5201, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0, 15872, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578273950638899_504_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578273950638899_504_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..19b0d154 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578273950638899_504_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,154 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 128, 0, 3216, 128, 0, 3232, 128, 0, 4800, 47, 0, 4800, 47, 0, 4800, 47, 0, 4800, 47, 0, 4800, 47, 0, 5712, 1, 0, 5728, 1, 0, 8784, 16384, 0, 8800, 16384, 0, 9664, 32768, 0, 3200, 128, 0, 3216, 128, 0, 3232, 128, 0, 4800, 47, 0, 4800, 47, 0, 4800, 47, 0, 4800, 47, 0, 4800, 47, 0, 5712, 1, 0, 5728, 1, 0, 8784, 16384, 0, 8800, 16384, 0, 9664, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578344152762920_507_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578344152762920_507_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eb484978 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578344152762920_507_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,224 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5504, 1040, 0, 5504, 1040, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 5504, 1040, 0, 5504, 1040, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 5824, 18724, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7120, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0, 7136, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578350722341583_509_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578350722341583_509_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..67868e70 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578350722341583_509_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2496, 8, 0, 2512, 8, 0, 3456, 8, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2496, 8, 0, 2512, 8, 0, 3456, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578350846234722_510_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578350846234722_510_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..26240416 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578350846234722_510_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 7488, 64, 0, 7936, 2048, 0, 9152, 8192, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 64, 0, 10368, 17, 0, 10368, 17, 0, 10944, 4369, 0, 10944, 4369, 0, 10944, 4369, 0, 10944, 4369, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11712, 34952, 0, 11712, 34952, 0, 11712, 34952, 0, 11712, 34952, 0, 3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 3776, 21844, 0, 7488, 64, 0, 7936, 2048, 0, 9152, 8192, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 21780, 0, 9728, 64, 0, 10368, 17, 0, 10368, 17, 0, 10944, 4369, 0, 10944, 4369, 0, 10944, 4369, 0, 10944, 4369, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11264, 30583, 0, 11712, 34952, 0, 11712, 34952, 0, 11712, 34952, 0, 11712, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578351655375072_511_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578351655375072_511_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bfc930f1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578351655375072_511_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,218 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3728, 16681, 0, 3728, 16681, 0, 3728, 16681, 0, 3728, 16681, 0, 3728, 16681, 0, 3744, 16681, 0, 3744, 16681, 0, 3744, 16681, 0, 3744, 16681, 0, 3744, 16681, 0, 5200, 8, 0, 5204, 8, 0, 5216, 8, 0, 5220, 8, 0, 6480, 32769, 0, 6480, 32769, 0, 6484, 32769, 0, 6484, 32769, 0, 6496, 32769, 0, 6496, 32769, 0, 6500, 32769, 0, 6500, 32769, 0, 7184, 32769, 0, 7184, 32769, 0, 7188, 32769, 0, 7188, 32769, 0, 7200, 32769, 0, 7200, 32769, 0, 7204, 32769, 0, 7204, 32769, 0, 7888, 16392, 0, 7888, 16392, 0, 7892, 16392, 0, 7892, 16392, 0, 7904, 16392, 0, 7904, 16392, 0, 7908, 16392, 0, 7908, 16392, 0, 9104, 16704, 0, 9104, 16704, 0, 9104, 16704, 0, 9108, 16704, 0, 9108, 16704, 0, 9108, 16704, 0, 9120, 16704, 0, 9120, 16704, 0, 9120, 16704, 0, 9124, 16704, 0, 9124, 16704, 0, 9124, 16704, 0, 10576, 12312, 0, 10576, 12312, 0, 10576, 12312, 0, 10576, 12312, 0, 10580, 12312, 0, 10580, 12312, 0, 10580, 12312, 0, 10580, 12312, 0, 10592, 12312, 0, 10592, 12312, 0, 10592, 12312, 0, 10592, 12312, 0, 10596, 12312, 0, 10596, 12312, 0, 10596, 12312, 0, 10596, 12312, 0, 11536, 12304, 0, 11536, 12304, 0, 11536, 12304, 0, 11552, 12304, 0, 11552, 12304, 0, 11552, 12304, 0, 3728, 16681, 0, 3728, 16681, 0, 3728, 16681, 0, 3728, 16681, 0, 3728, 16681, 0, 3744, 16681, 0, 3744, 16681, 0, 3744, 16681, 0, 3744, 16681, 0, 3744, 16681, 0, 5200, 8, 0, 5204, 8, 0, 5216, 8, 0, 5220, 8, 0, 6480, 32769, 0, 6480, 32769, 0, 6484, 32769, 0, 6484, 32769, 0, 6496, 32769, 0, 6496, 32769, 0, 6500, 32769, 0, 6500, 32769, 0, 7184, 32769, 0, 7184, 32769, 0, 7188, 32769, 0, 7188, 32769, 0, 7200, 32769, 0, 7200, 32769, 0, 7204, 32769, 0, 7204, 32769, 0, 7888, 16392, 0, 7888, 16392, 0, 7892, 16392, 0, 7892, 16392, 0, 7904, 16392, 0, 7904, 16392, 0, 7908, 16392, 0, 7908, 16392, 0, 9104, 16704, 0, 9104, 16704, 0, 9104, 16704, 0, 9108, 16704, 0, 9108, 16704, 0, 9108, 16704, 0, 9120, 16704, 0, 9120, 16704, 0, 9120, 16704, 0, 9124, 16704, 0, 9124, 16704, 0, 9124, 16704, 0, 10576, 12312, 0, 10576, 12312, 0, 10576, 12312, 0, 10576, 12312, 0, 10580, 12312, 0, 10580, 12312, 0, 10580, 12312, 0, 10580, 12312, 0, 10592, 12312, 0, 10592, 12312, 0, 10592, 12312, 0, 10592, 12312, 0, 10596, 12312, 0, 10596, 12312, 0, 10596, 12312, 0, 10596, 12312, 0, 11536, 12304, 0, 11536, 12304, 0, 11536, 12304, 0, 11552, 12304, 0, 11552, 12304, 0, 11552, 12304, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578373674387837_512_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578373674387837_512_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c818edc4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578373674387837_512_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,225 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8260, 256, 0, 8264, 256, 0, 8276, 256, 0, 8280, 256, 0, 8292, 256, 0, 8296, 256, 0, 8708, 16, 0, 8712, 16, 0, 8724, 16, 0, 8728, 16, 0, 8740, 16, 0, 8744, 16, 0, 9408, 272, 0, 9408, 272, 0, 9424, 272, 0, 9424, 272, 0, 9440, 272, 0, 9440, 272, 0, 9856, 256, 0, 9872, 256, 0, 9888, 256, 0, 12864, 4, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0, 8260, 256, 0, 8264, 256, 0, 8276, 256, 0, 8280, 256, 0, 8292, 256, 0, 8296, 256, 0, 8708, 16, 0, 8712, 16, 0, 8724, 16, 0, 8728, 16, 0, 8740, 16, 0, 8744, 16, 0, 9408, 272, 0, 9408, 272, 0, 9424, 272, 0, 9424, 272, 0, 9440, 272, 0, 9440, 272, 0, 9856, 256, 0, 9872, 256, 0, 9888, 256, 0, 12864, 4, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 12608, 43690, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0, 11968, 21841, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578380431615147_513_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578380431615147_513_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6ff6a68e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578380431615147_513_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4352, 0, 1088, 4352, 0, 1104, 4352, 0, 1104, 4352, 0, 5632, 1, 0, 5648, 1, 0, 6528, 17476, 0, 6528, 17476, 0, 6528, 17476, 0, 6528, 17476, 0, 6976, 34952, 0, 6976, 34952, 0, 6976, 34952, 0, 6976, 34952, 0, 1088, 4352, 0, 1088, 4352, 0, 1104, 4352, 0, 1104, 4352, 0, 5632, 1, 0, 5648, 1, 0, 6528, 17476, 0, 6528, 17476, 0, 6528, 17476, 0, 6528, 17476, 0, 6976, 34952, 0, 6976, 34952, 0, 6976, 34952, 0, 6976, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578380638448969_514_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578380638448969_514_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e94c46e1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578380638448969_514_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,157 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2512, 10272, 0, 2512, 10272, 0, 2512, 10272, 0, 2528, 10272, 0, 2528, 10272, 0, 2528, 10272, 0, 5840, 8352, 0, 5840, 8352, 0, 5840, 8352, 0, 5856, 8352, 0, 5856, 8352, 0, 5856, 8352, 0, 8768, 1092, 0, 8768, 1092, 0, 8768, 1092, 0, 10832, 1024, 0, 10848, 1024, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2512, 10272, 0, 2512, 10272, 0, 2512, 10272, 0, 2528, 10272, 0, 2528, 10272, 0, 2528, 10272, 0, 5840, 8352, 0, 5840, 8352, 0, 5840, 8352, 0, 5856, 8352, 0, 5856, 8352, 0, 5856, 8352, 0, 8768, 1092, 0, 8768, 1092, 0, 8768, 1092, 0, 10832, 1024, 0, 10848, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578382156594983_515_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578382156594983_515_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3180f8f7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578382156594983_515_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,386 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((174 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((303 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((310 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((317 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (338 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + } + break; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3392, 2064, 0, 3392, 2064, 0, 3008, 4128, 0, 3008, 4128, 0, 5184, 32769, 0, 5184, 32769, 0, 5200, 32769, 0, 5200, 32769, 0, 6336, 32769, 0, 6336, 32769, 0, 6352, 32769, 0, 6352, 32769, 0, 9152, 2, 0, 9168, 2, 0, 9184, 2, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 12096, 15, 0, 12096, 15, 0, 12096, 15, 0, 12096, 15, 0, 12928, 1, 0, 16576, 64, 0, 18432, 8192, 0, 18448, 8192, 0, 18464, 8192, 0, 23296, 16, 0, 3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3776, 57359, 0, 3392, 2064, 0, 3392, 2064, 0, 3008, 4128, 0, 3008, 4128, 0, 5184, 32769, 0, 5184, 32769, 0, 5200, 32769, 0, 5200, 32769, 0, 6336, 32769, 0, 6336, 32769, 0, 6352, 32769, 0, 6352, 32769, 0, 9152, 2, 0, 9168, 2, 0, 9184, 2, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 11456, 18724, 0, 12096, 15, 0, 12096, 15, 0, 12096, 15, 0, 12096, 15, 0, 12928, 1, 0, 16576, 64, 0, 18432, 8192, 0, 18448, 8192, 0, 18464, 8192, 0, 23296, 16, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578403980946650_517_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578403980946650_517_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4822e36e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578403980946650_517_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578404188674866_518_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578404188674866_518_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..997c311b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578404188674866_518_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((236 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((259 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((i2 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 3904, 8194, 0, 3904, 8194, 0, 3264, 8, 0, 10560, 17538, 0, 10560, 17538, 0, 10560, 17538, 0, 10560, 17538, 0, 12544, 16961, 0, 12544, 16961, 0, 12544, 16961, 0, 12544, 16961, 0, 12560, 16961, 0, 12560, 16961, 0, 12560, 16961, 0, 12560, 16961, 0, 14468, 4374, 0, 14468, 4374, 0, 14468, 4374, 0, 14468, 4374, 0, 14468, 4374, 0, 14484, 4374, 0, 14484, 4374, 0, 14484, 4374, 0, 14484, 4374, 0, 14484, 4374, 0, 16580, 33826, 0, 16580, 33826, 0, 16580, 33826, 0, 16580, 33826, 0, 16596, 33826, 0, 16596, 33826, 0, 16596, 33826, 0, 16596, 33826, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 4160, 21845, 0, 3904, 8194, 0, 3904, 8194, 0, 3264, 8, 0, 10560, 17538, 0, 10560, 17538, 0, 10560, 17538, 0, 10560, 17538, 0, 12544, 16961, 0, 12544, 16961, 0, 12544, 16961, 0, 12544, 16961, 0, 12560, 16961, 0, 12560, 16961, 0, 12560, 16961, 0, 12560, 16961, 0, 14468, 4374, 0, 14468, 4374, 0, 14468, 4374, 0, 14468, 4374, 0, 14468, 4374, 0, 14484, 4374, 0, 14484, 4374, 0, 14484, 4374, 0, 14484, 4374, 0, 14484, 4374, 0, 16580, 33826, 0, 16580, 33826, 0, 16580, 33826, 0, 16580, 33826, 0, 16596, 33826, 0, 16596, 33826, 0, 16596, 33826, 0, 16596, 33826, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578406768851152_519_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578406768851152_519_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab2f299f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578406768851152_519_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,424 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 3)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (371 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (389 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 73, 0, 960, 73, 0, 960, 73, 0, 16832, 1024, 0, 17152, 18724, 0, 17152, 18724, 0, 17152, 18724, 0, 17152, 18724, 0, 17152, 18724, 0, 18048, 73, 0, 18048, 73, 0, 18048, 73, 0, 20352, 16, 0, 23744, 1024, 0, 25216, 18724, 0, 25216, 18724, 0, 25216, 18724, 0, 25216, 18724, 0, 25216, 18724, 0, 960, 73, 0, 960, 73, 0, 960, 73, 0, 16832, 1024, 0, 17152, 18724, 0, 17152, 18724, 0, 17152, 18724, 0, 17152, 18724, 0, 17152, 18724, 0, 18048, 73, 0, 18048, 73, 0, 18048, 73, 0, 20352, 16, 0, 23744, 1024, 0, 25216, 18724, 0, 25216, 18724, 0, 25216, 18724, 0, 25216, 18724, 0, 25216, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578421841149771_523_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578421841149771_523_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6ae912f1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578421841149771_523_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,280 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5696, 73, 0, 5696, 73, 0, 5696, 73, 0, 6272, 1040, 0, 6272, 1040, 0, 6912, 16384, 0, 11392, 85, 0, 11392, 85, 0, 11392, 85, 0, 11392, 85, 0, 12736, 32768, 0, 5696, 73, 0, 5696, 73, 0, 5696, 73, 0, 6272, 1040, 0, 6272, 1040, 0, 6912, 16384, 0, 11392, 85, 0, 11392, 85, 0, 11392, 85, 0, 11392, 85, 0, 12736, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578423438329294_524_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578423438329294_524_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d586c537 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578423438329294_524_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,209 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578423660604065_525_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578423660604065_525_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578423660604065_525_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578426829276409_527_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578426829276409_527_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db26218d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578426829276409_527_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,416 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 5))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((311 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((320 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((359 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((380 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (430 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 606 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3524, 4096, 0, 3528, 4096, 0, 3532, 4096, 0, 3540, 4096, 0, 3544, 4096, 0, 3548, 4096, 0, 4740, 320, 0, 4740, 320, 0, 4744, 320, 0, 4744, 320, 0, 4748, 320, 0, 4748, 320, 0, 4756, 320, 0, 4756, 320, 0, 4760, 320, 0, 4760, 320, 0, 4764, 320, 0, 4764, 320, 0, 6212, 16385, 0, 6212, 16385, 0, 6216, 16385, 0, 6216, 16385, 0, 6220, 16385, 0, 6220, 16385, 0, 6228, 16385, 0, 6228, 16385, 0, 6232, 16385, 0, 6232, 16385, 0, 6236, 16385, 0, 6236, 16385, 0, 6656, 16384, 0, 6672, 16384, 0, 10644, 32, 0, 10648, 32, 0, 10660, 32, 0, 10664, 32, 0, 10676, 32, 0, 10680, 32, 0, 12112, 32, 0, 12128, 32, 0, 12144, 32, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 13312, 73, 0, 13312, 73, 0, 13312, 73, 0, 17536, 2080, 0, 17536, 2080, 0, 20496, 2080, 0, 20496, 2080, 0, 20512, 2080, 0, 20512, 2080, 0, 21248, 2080, 0, 21248, 2080, 0, 21824, 16644, 0, 21824, 16644, 0, 21824, 16644, 0, 22992, 16388, 0, 22992, 16388, 0, 23008, 16388, 0, 23008, 16388, 0, 23024, 16388, 0, 23024, 16388, 0, 24336, 16388, 0, 24336, 16388, 0, 24352, 16388, 0, 24352, 16388, 0, 24368, 16388, 0, 24368, 16388, 0, 25920, 17, 0, 25920, 17, 0, 26816, 17476, 0, 26816, 17476, 0, 26816, 17476, 0, 26816, 17476, 0, 27264, 34952, 0, 27264, 34952, 0, 27264, 34952, 0, 27264, 34952, 0, 3524, 4096, 0, 3528, 4096, 0, 3532, 4096, 0, 3540, 4096, 0, 3544, 4096, 0, 3548, 4096, 0, 4740, 320, 0, 4740, 320, 0, 4744, 320, 0, 4744, 320, 0, 4748, 320, 0, 4748, 320, 0, 4756, 320, 0, 4756, 320, 0, 4760, 320, 0, 4760, 320, 0, 4764, 320, 0, 4764, 320, 0, 6212, 16385, 0, 6212, 16385, 0, 6216, 16385, 0, 6216, 16385, 0, 6220, 16385, 0, 6220, 16385, 0, 6228, 16385, 0, 6228, 16385, 0, 6232, 16385, 0, 6232, 16385, 0, 6236, 16385, 0, 6236, 16385, 0, 6656, 16384, 0, 6672, 16384, 0, 10644, 32, 0, 10648, 32, 0, 10660, 32, 0, 10664, 32, 0, 10676, 32, 0, 10680, 32, 0, 12112, 32, 0, 12128, 32, 0, 12144, 32, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12688, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12704, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 12720, 43690, 0, 13312, 73, 0, 13312, 73, 0, 13312, 73, 0, 17536, 2080, 0, 17536, 2080, 0, 20496, 2080, 0, 20496, 2080, 0, 20512, 2080, 0, 20512, 2080, 0, 21248, 2080, 0, 21248, 2080, 0, 21824, 16644, 0, 21824, 16644, 0, 21824, 16644, 0, 22992, 16388, 0, 22992, 16388, 0, 23008, 16388, 0, 23008, 16388, 0, 23024, 16388, 0, 23024, 16388, 0, 24336, 16388, 0, 24336, 16388, 0, 24352, 16388, 0, 24352, 16388, 0, 24368, 16388, 0, 24368, 16388, 0, 25920, 17, 0, 25920, 17, 0, 26816, 17476, 0, 26816, 17476, 0, 26816, 17476, 0, 26816, 17476, 0, 27264, 34952, 0, 27264, 34952, 0, 27264, 34952, 0, 27264, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578446299654351_528_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578446299654351_528_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ad8f32cc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578446299654351_528_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 512, 0, 2624, 1074, 0, 2624, 1074, 0, 2624, 1074, 0, 2624, 1074, 0, 2368, 4096, 0, 2112, 77, 0, 2112, 77, 0, 2112, 77, 0, 2112, 77, 0, 3008, 512, 0, 2624, 1074, 0, 2624, 1074, 0, 2624, 1074, 0, 2624, 1074, 0, 2368, 4096, 0, 2112, 77, 0, 2112, 77, 0, 2112, 77, 0, 2112, 77, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578446441004167_529_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578446441004167_529_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..88545fb5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578446441004167_529_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578455960583818_532_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578455960583818_532_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2f3e9842 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578455960583818_532_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 9)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((249 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6016, 34952, 0, 6016, 34952, 0, 6016, 34952, 0, 6016, 34952, 0, 8832, 17, 0, 8832, 17, 0, 14464, 1024, 0, 14468, 1024, 0, 14480, 1024, 0, 14484, 1024, 0, 16576, 34952, 0, 16576, 34952, 0, 16576, 34952, 0, 16576, 34952, 0, 576, 17, 0, 576, 17, 0, 6016, 34952, 0, 6016, 34952, 0, 6016, 34952, 0, 6016, 34952, 0, 8832, 17, 0, 8832, 17, 0, 14464, 1024, 0, 14468, 1024, 0, 14480, 1024, 0, 14484, 1024, 0, 16576, 34952, 0, 16576, 34952, 0, 16576, 34952, 0, 16576, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578456976119943_533_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578456976119943_533_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..88545fb5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578456976119943_533_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578457092791494_534_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578457092791494_534_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b6fee6ef --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578457092791494_534_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,203 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 8640, 8, 0, 9856, 2048, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 8640, 8, 0, 9856, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578457391863481_535_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578457391863481_535_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f14d7cf1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578457391863481_535_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,200 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + } + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2768, 32, 0, 2772, 32, 0, 2784, 32, 0, 2788, 32, 0, 3728, 4353, 0, 3728, 4353, 0, 3728, 4353, 0, 3732, 4353, 0, 3732, 4353, 0, 3732, 4353, 0, 3744, 4353, 0, 3744, 4353, 0, 3744, 4353, 0, 3748, 4353, 0, 3748, 4353, 0, 3748, 4353, 0, 9664, 1024, 0, 10624, 36, 0, 10624, 36, 0, 14080, 548, 0, 14080, 548, 0, 14080, 548, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 576, 17, 0, 576, 17, 0, 2768, 32, 0, 2772, 32, 0, 2784, 32, 0, 2788, 32, 0, 3728, 4353, 0, 3728, 4353, 0, 3728, 4353, 0, 3732, 4353, 0, 3732, 4353, 0, 3732, 4353, 0, 3744, 4353, 0, 3744, 4353, 0, 3744, 4353, 0, 3748, 4353, 0, 3748, 4353, 0, 3748, 4353, 0, 9664, 1024, 0, 10624, 36, 0, 10624, 36, 0, 14080, 548, 0, 14080, 548, 0, 14080, 548, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0, 14528, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578461712356245_536_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578461712356245_536_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d3024c1c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578461712356245_536_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,184 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 13)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4864, 8192, 0, 6656, 2, 0, 6672, 2, 0, 6688, 2, 0, 11344, 4, 0, 11360, 4, 0, 11376, 4, 0, 11776, 34952, 0, 11776, 34952, 0, 11776, 34952, 0, 11776, 34952, 0, 576, 17, 0, 576, 17, 0, 4864, 8192, 0, 6656, 2, 0, 6672, 2, 0, 6688, 2, 0, 11344, 4, 0, 11360, 4, 0, 11376, 4, 0, 11776, 34952, 0, 11776, 34952, 0, 11776, 34952, 0, 11776, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578465348521545_538_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578465348521545_538_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..38f72ded --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578465348521545_538_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 2240, 43010, 0, 2240, 43010, 0, 2240, 43010, 0, 2240, 43010, 0, 5312, 43010, 0, 5312, 43010, 0, 5312, 43010, 0, 5312, 43010, 0, 1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 1216, 63491, 0, 2240, 43010, 0, 2240, 43010, 0, 2240, 43010, 0, 2240, 43010, 0, 5312, 43010, 0, 5312, 43010, 0, 5312, 43010, 0, 5312, 43010, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578465945728090_539_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578465945728090_539_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b5f78eb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578465945728090_539_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,212 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4992, 1, 0, 5008, 1, 0, 5024, 1, 0, 5824, 8, 0, 5840, 8, 0, 5856, 8, 0, 6976, 8192, 0, 6992, 8192, 0, 7008, 8192, 0, 7936, 9216, 0, 7936, 9216, 0, 7940, 9216, 0, 7940, 9216, 0, 7952, 9216, 0, 7952, 9216, 0, 7956, 9216, 0, 7956, 9216, 0, 7968, 9216, 0, 7968, 9216, 0, 7972, 9216, 0, 7972, 9216, 0, 8384, 8192, 0, 8400, 8192, 0, 8416, 8192, 0, 12032, 256, 0, 12048, 256, 0, 12064, 256, 0, 13248, 16384, 0, 13264, 16384, 0, 13280, 16384, 0, 4992, 1, 0, 5008, 1, 0, 5024, 1, 0, 5824, 8, 0, 5840, 8, 0, 5856, 8, 0, 6976, 8192, 0, 6992, 8192, 0, 7008, 8192, 0, 7936, 9216, 0, 7936, 9216, 0, 7940, 9216, 0, 7940, 9216, 0, 7952, 9216, 0, 7952, 9216, 0, 7956, 9216, 0, 7956, 9216, 0, 7968, 9216, 0, 7968, 9216, 0, 7972, 9216, 0, 7972, 9216, 0, 8384, 8192, 0, 8400, 8192, 0, 8416, 8192, 0, 12032, 256, 0, 12048, 256, 0, 12064, 256, 0, 13248, 16384, 0, 13264, 16384, 0, 13280, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578471752925443_541_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578471752925443_541_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5b59da9a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578471752925443_541_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 33288, 0, 1408, 33288, 0, 1408, 33288, 0, 1424, 33288, 0, 1424, 33288, 0, 1424, 33288, 0, 5376, 1040, 0, 5376, 1040, 0, 6288, 4, 0, 6304, 4, 0, 6320, 4, 0, 7760, 4, 0, 7776, 4, 0, 7792, 4, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 10880, 2, 0, 10896, 2, 0, 10912, 2, 0, 1408, 33288, 0, 1408, 33288, 0, 1408, 33288, 0, 1424, 33288, 0, 1424, 33288, 0, 1424, 33288, 0, 5376, 1040, 0, 5376, 1040, 0, 6288, 4, 0, 6304, 4, 0, 6320, 4, 0, 7760, 4, 0, 7776, 4, 0, 7792, 4, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9216, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9232, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 9248, 64515, 0, 10880, 2, 0, 10896, 2, 0, 10912, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578480727004756_542_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578480727004756_542_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dfa16073 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578480727004756_542_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,311 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 14)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((381 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((401 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((412 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4864, 16384, 0, 7172, 1, 0, 7176, 1, 0, 7180, 1, 0, 7188, 1, 0, 7192, 1, 0, 7196, 1, 0, 7876, 4097, 0, 7876, 4097, 0, 7880, 4097, 0, 7880, 4097, 0, 7884, 4097, 0, 7884, 4097, 0, 7892, 4097, 0, 7892, 4097, 0, 7896, 4097, 0, 7896, 4097, 0, 7900, 4097, 0, 7900, 4097, 0, 8640, 4369, 0, 8640, 4369, 0, 8640, 4369, 0, 8640, 4369, 0, 19904, 32896, 0, 19904, 32896, 0, 24384, 1301, 0, 24384, 1301, 0, 24384, 1301, 0, 24384, 1301, 0, 24384, 1301, 0, 24400, 1301, 0, 24400, 1301, 0, 24400, 1301, 0, 24400, 1301, 0, 24400, 1301, 0, 25664, 32781, 0, 25664, 32781, 0, 25664, 32781, 0, 25664, 32781, 0, 25680, 32781, 0, 25680, 32781, 0, 25680, 32781, 0, 25680, 32781, 0, 26368, 32769, 0, 26368, 32769, 0, 26384, 32769, 0, 26384, 32769, 0, 4864, 16384, 0, 7172, 1, 0, 7176, 1, 0, 7180, 1, 0, 7188, 1, 0, 7192, 1, 0, 7196, 1, 0, 7876, 4097, 0, 7876, 4097, 0, 7880, 4097, 0, 7880, 4097, 0, 7884, 4097, 0, 7884, 4097, 0, 7892, 4097, 0, 7892, 4097, 0, 7896, 4097, 0, 7896, 4097, 0, 7900, 4097, 0, 7900, 4097, 0, 8640, 4369, 0, 8640, 4369, 0, 8640, 4369, 0, 8640, 4369, 0, 19904, 32896, 0, 19904, 32896, 0, 24384, 1301, 0, 24384, 1301, 0, 24384, 1301, 0, 24384, 1301, 0, 24384, 1301, 0, 24400, 1301, 0, 24400, 1301, 0, 24400, 1301, 0, 24400, 1301, 0, 24400, 1301, 0, 25664, 32781, 0, 25664, 32781, 0, 25664, 32781, 0, 25664, 32781, 0, 25680, 32781, 0, 25680, 32781, 0, 25680, 32781, 0, 25680, 32781, 0, 26368, 32769, 0, 26368, 32769, 0, 26384, 32769, 0, 26384, 32769, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578606695903681_544_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578606695903681_544_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28de0583 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578606695903681_544_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,111 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578606817623799_545_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578606817623799_545_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b78c648b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578606817623799_545_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4096, 0, 1104, 4096, 0, 1120, 4096, 0, 2244, 1, 0, 2248, 1, 0, 2252, 1, 0, 2260, 1, 0, 2264, 1, 0, 2268, 1, 0, 2276, 1, 0, 2280, 1, 0, 2284, 1, 0, 2816, 16, 0, 2832, 16, 0, 2848, 16, 0, 7936, 32, 0, 8256, 17476, 0, 8256, 17476, 0, 8256, 17476, 0, 8256, 17476, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 9344, 85, 0, 9344, 85, 0, 9344, 85, 0, 9344, 85, 0, 10768, 2048, 0, 10784, 2048, 0, 10800, 2048, 0, 1088, 4096, 0, 1104, 4096, 0, 1120, 4096, 0, 2244, 1, 0, 2248, 1, 0, 2252, 1, 0, 2260, 1, 0, 2264, 1, 0, 2268, 1, 0, 2276, 1, 0, 2280, 1, 0, 2284, 1, 0, 2816, 16, 0, 2832, 16, 0, 2848, 16, 0, 7936, 32, 0, 8256, 17476, 0, 8256, 17476, 0, 8256, 17476, 0, 8256, 17476, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 8704, 52428, 0, 9344, 85, 0, 9344, 85, 0, 9344, 85, 0, 9344, 85, 0, 10768, 2048, 0, 10784, 2048, 0, 10800, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578616391759954_546_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578616391759954_546_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fd2a6826 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578616391759954_546_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 3328, 16384, 0, 3344, 16384, 0, 3360, 16384, 0, 4484, 18432, 0, 4484, 18432, 0, 4500, 18432, 0, 4500, 18432, 0, 4516, 18432, 0, 4516, 18432, 0, 5188, 18436, 0, 5188, 18436, 0, 5188, 18436, 0, 5204, 18436, 0, 5204, 18436, 0, 5204, 18436, 0, 5220, 18436, 0, 5220, 18436, 0, 5220, 18436, 0, 6592, 256, 0, 6608, 256, 0, 8896, 32, 0, 8912, 32, 0, 9344, 32, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 3328, 16384, 0, 3344, 16384, 0, 3360, 16384, 0, 4484, 18432, 0, 4484, 18432, 0, 4500, 18432, 0, 4500, 18432, 0, 4516, 18432, 0, 4516, 18432, 0, 5188, 18436, 0, 5188, 18436, 0, 5188, 18436, 0, 5204, 18436, 0, 5204, 18436, 0, 5204, 18436, 0, 5220, 18436, 0, 5220, 18436, 0, 5220, 18436, 0, 6592, 256, 0, 6608, 256, 0, 8896, 32, 0, 8912, 32, 0, 9344, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578654452745248_549_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578654452745248_549_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..84d1b71c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578654452745248_549_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 2048, 0, 1040, 2048, 0, 2688, 64, 0, 2704, 64, 0, 4036, 8192, 0, 4040, 8192, 0, 4052, 8192, 0, 4056, 8192, 0, 4928, 32770, 0, 4928, 32770, 0, 4944, 32770, 0, 4944, 32770, 0, 5828, 128, 0, 5832, 128, 0, 5836, 128, 0, 5844, 128, 0, 5848, 128, 0, 5852, 128, 0, 6272, 64, 0, 6288, 64, 0, 10064, 20481, 0, 10064, 20481, 0, 10064, 20481, 0, 10080, 20481, 0, 10080, 20481, 0, 10080, 20481, 0, 10096, 20481, 0, 10096, 20481, 0, 10096, 20481, 0, 11984, 16385, 0, 11984, 16385, 0, 12000, 16385, 0, 12000, 16385, 0, 12016, 16385, 0, 12016, 16385, 0, 1024, 2048, 0, 1040, 2048, 0, 2688, 64, 0, 2704, 64, 0, 4036, 8192, 0, 4040, 8192, 0, 4052, 8192, 0, 4056, 8192, 0, 4928, 32770, 0, 4928, 32770, 0, 4944, 32770, 0, 4944, 32770, 0, 5828, 128, 0, 5832, 128, 0, 5836, 128, 0, 5844, 128, 0, 5848, 128, 0, 5852, 128, 0, 6272, 64, 0, 6288, 64, 0, 10064, 20481, 0, 10064, 20481, 0, 10064, 20481, 0, 10080, 20481, 0, 10080, 20481, 0, 10080, 20481, 0, 10096, 20481, 0, 10096, 20481, 0, 10096, 20481, 0, 11984, 16385, 0, 11984, 16385, 0, 12000, 16385, 0, 12000, 16385, 0, 12016, 16385, 0, 12016, 16385, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578664253285611_551_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578664253285611_551_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7fea08d6 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578664253285611_551_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,322 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 7168, 512, 0, 9024, 17476, 0, 9024, 17476, 0, 9024, 17476, 0, 9024, 17476, 0, 9984, 136, 0, 9984, 136, 0, 10000, 136, 0, 10000, 136, 0, 15424, 32768, 0, 15440, 32768, 0, 16768, 17, 0, 16768, 17, 0, 17344, 4369, 0, 17344, 4369, 0, 17344, 4369, 0, 17344, 4369, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 19664, 8200, 0, 19664, 8200, 0, 21008, 2, 0, 576, 17, 0, 576, 17, 0, 7168, 512, 0, 9024, 17476, 0, 9024, 17476, 0, 9024, 17476, 0, 9024, 17476, 0, 9984, 136, 0, 9984, 136, 0, 10000, 136, 0, 10000, 136, 0, 15424, 32768, 0, 15440, 32768, 0, 16768, 17, 0, 16768, 17, 0, 17344, 4369, 0, 17344, 4369, 0, 17344, 4369, 0, 17344, 4369, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 17664, 30583, 0, 19664, 8200, 0, 19664, 8200, 0, 21008, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578665989642054_552_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578665989642054_552_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..99c29939 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578665989642054_552_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((29 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1876, 8, 0, 1880, 8, 0, 1892, 8, 0, 1896, 8, 0, 1908, 8, 0, 1912, 8, 0, 2772, 33288, 0, 2772, 33288, 0, 2772, 33288, 0, 2776, 33288, 0, 2776, 33288, 0, 2776, 33288, 0, 2788, 33288, 0, 2788, 33288, 0, 2788, 33288, 0, 2792, 33288, 0, 2792, 33288, 0, 2792, 33288, 0, 2804, 33288, 0, 2804, 33288, 0, 2804, 33288, 0, 2808, 33288, 0, 2808, 33288, 0, 2808, 33288, 0, 4500, 4161, 0, 4500, 4161, 0, 4500, 4161, 0, 4504, 4161, 0, 4504, 4161, 0, 4504, 4161, 0, 4516, 4161, 0, 4516, 4161, 0, 4516, 4161, 0, 4520, 4161, 0, 4520, 4161, 0, 4520, 4161, 0, 4532, 4161, 0, 4532, 4161, 0, 4532, 4161, 0, 4536, 4161, 0, 4536, 4161, 0, 4536, 4161, 0, 5056, 1040, 0, 5056, 1040, 0, 5376, 18724, 0, 5376, 18724, 0, 5376, 18724, 0, 5376, 18724, 0, 5376, 18724, 0, 1876, 8, 0, 1880, 8, 0, 1892, 8, 0, 1896, 8, 0, 1908, 8, 0, 1912, 8, 0, 2772, 33288, 0, 2772, 33288, 0, 2772, 33288, 0, 2776, 33288, 0, 2776, 33288, 0, 2776, 33288, 0, 2788, 33288, 0, 2788, 33288, 0, 2788, 33288, 0, 2792, 33288, 0, 2792, 33288, 0, 2792, 33288, 0, 2804, 33288, 0, 2804, 33288, 0, 2804, 33288, 0, 2808, 33288, 0, 2808, 33288, 0, 2808, 33288, 0, 4500, 4161, 0, 4500, 4161, 0, 4500, 4161, 0, 4504, 4161, 0, 4504, 4161, 0, 4504, 4161, 0, 4516, 4161, 0, 4516, 4161, 0, 4516, 4161, 0, 4520, 4161, 0, 4520, 4161, 0, 4520, 4161, 0, 4532, 4161, 0, 4532, 4161, 0, 4532, 4161, 0, 4536, 4161, 0, 4536, 4161, 0, 4536, 4161, 0, 5056, 1040, 0, 5056, 1040, 0, 5376, 18724, 0, 5376, 18724, 0, 5376, 18724, 0, 5376, 18724, 0, 5376, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578668309927934_554_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578668309927934_554_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e958b6f0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578668309927934_554_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,80 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578722305774870_556_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578722305774870_556_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8f890c47 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578722305774870_556_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,209 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 15)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((193 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9792, 2, 0, 9808, 2, 0, 10944, 40962, 0, 10944, 40962, 0, 10944, 40962, 0, 10960, 40962, 0, 10960, 40962, 0, 10960, 40962, 0, 12356, 8192, 0, 12360, 8192, 0, 12372, 8192, 0, 12376, 8192, 0, 14016, 32768, 0, 14032, 32768, 0, 15104, 514, 0, 15104, 514, 0, 15120, 514, 0, 15120, 514, 0, 9792, 2, 0, 9808, 2, 0, 10944, 40962, 0, 10944, 40962, 0, 10944, 40962, 0, 10960, 40962, 0, 10960, 40962, 0, 10960, 40962, 0, 12356, 8192, 0, 12360, 8192, 0, 12372, 8192, 0, 12376, 8192, 0, 14016, 32768, 0, 14032, 32768, 0, 15104, 514, 0, 15104, 514, 0, 15120, 514, 0, 15120, 514, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578725963414557_557_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578725963414557_557_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f14f4a91 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578725963414557_557_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 256, 0, 2816, 1024, 0, 2560, 4225, 0, 2560, 4225, 0, 2560, 4225, 0, 2048, 59904, 0, 2048, 59904, 0, 2048, 59904, 0, 2048, 59904, 0, 2048, 59904, 0, 3200, 256, 0, 2816, 1024, 0, 2560, 4225, 0, 2560, 4225, 0, 2560, 4225, 0, 2048, 59904, 0, 2048, 59904, 0, 2048, 59904, 0, 2048, 59904, 0, 2048, 59904, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578726137025962_558_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578726137025962_558_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..896c4a71 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578726137025962_558_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 7))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } else { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 2, 0, 6144, 2, 0, 6148, 2, 0, 6152, 2, 0, 6160, 2, 0, 6164, 2, 0, 6168, 2, 0, 6720, 256, 0, 6736, 256, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0, 2240, 2, 0, 6144, 2, 0, 6148, 2, 0, 6152, 2, 0, 6160, 2, 0, 6164, 2, 0, 6168, 2, 0, 6720, 256, 0, 6736, 256, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9344, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 9360, 64525, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11136, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 11152, 64541, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0, 12032, 57373, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578727517141885_559_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578727517141885_559_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5d5d084 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578727517141885_559_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,154 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 6))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1344, 40, 0, 1344, 40, 0, 2432, 73, 0, 2432, 73, 0, 2432, 73, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1600, 65280, 0, 1344, 40, 0, 1344, 40, 0, 2432, 73, 0, 2432, 73, 0, 2432, 73, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578727767488288_560_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578727767488288_560_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2a13723a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578727767488288_560_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 16, 0, 2816, 257, 0, 2816, 257, 0, 5056, 17476, 0, 5056, 17476, 0, 5056, 17476, 0, 5056, 17476, 0, 9408, 10, 0, 9408, 10, 0, 9424, 10, 0, 9424, 10, 0, 10948, 8192, 0, 10952, 8192, 0, 10964, 8192, 0, 10968, 8192, 0, 2112, 16, 0, 2816, 257, 0, 2816, 257, 0, 5056, 17476, 0, 5056, 17476, 0, 5056, 17476, 0, 5056, 17476, 0, 9408, 10, 0, 9408, 10, 0, 9424, 10, 0, 9424, 10, 0, 10948, 8192, 0, 10952, 8192, 0, 10964, 8192, 0, 10968, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578756723236381_562_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578756723236381_562_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3dcfc118 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578756723236381_562_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,283 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((314 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((346 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((361 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((368 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 8192, 0, 7296, 2048, 0, 13056, 128, 0, 14272, 2, 0, 15488, 512, 0, 16832, 32, 0, 19216, 1024, 0, 19232, 1024, 0, 19248, 1024, 0, 20116, 16, 0, 20120, 16, 0, 20124, 16, 0, 20132, 16, 0, 20136, 16, 0, 20140, 16, 0, 20148, 16, 0, 20152, 16, 0, 20156, 16, 0, 23124, 256, 0, 23128, 256, 0, 23132, 256, 0, 23140, 256, 0, 23144, 256, 0, 23148, 256, 0, 23156, 256, 0, 23160, 256, 0, 23164, 256, 0, 25040, 16, 0, 25056, 16, 0, 25072, 16, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 26240, 85, 0, 26240, 85, 0, 26240, 85, 0, 26240, 85, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 1856, 8192, 0, 7296, 2048, 0, 13056, 128, 0, 14272, 2, 0, 15488, 512, 0, 16832, 32, 0, 19216, 1024, 0, 19232, 1024, 0, 19248, 1024, 0, 20116, 16, 0, 20120, 16, 0, 20124, 16, 0, 20132, 16, 0, 20136, 16, 0, 20140, 16, 0, 20148, 16, 0, 20152, 16, 0, 20156, 16, 0, 23124, 256, 0, 23128, 256, 0, 23132, 256, 0, 23140, 256, 0, 23144, 256, 0, 23148, 256, 0, 23156, 256, 0, 23160, 256, 0, 23164, 256, 0, 25040, 16, 0, 25056, 16, 0, 25072, 16, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 26240, 85, 0, 26240, 85, 0, 26240, 85, 0, 26240, 85, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0, 26816, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578768406267909_563_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578768406267909_563_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bec951f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578768406267909_563_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 512, 0, 3024, 2048, 0, 3040, 2048, 0, 4240, 4, 0, 4244, 4, 0, 4248, 4, 0, 4256, 4, 0, 4260, 4, 0, 4264, 4, 0, 5456, 512, 0, 5460, 512, 0, 5464, 512, 0, 5472, 512, 0, 5476, 512, 0, 5480, 512, 0, 7744, 73, 0, 7744, 73, 0, 7744, 73, 0, 8320, 1040, 0, 8320, 1040, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 2112, 512, 0, 3024, 2048, 0, 3040, 2048, 0, 4240, 4, 0, 4244, 4, 0, 4248, 4, 0, 4256, 4, 0, 4260, 4, 0, 4264, 4, 0, 5456, 512, 0, 5460, 512, 0, 5464, 512, 0, 5472, 512, 0, 5476, 512, 0, 5480, 512, 0, 7744, 73, 0, 7744, 73, 0, 7744, 73, 0, 8320, 1040, 0, 8320, 1040, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578810095279787_565_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578810095279787_565_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e413e438 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578810095279787_565_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,340 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if ((counter3 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 1, 0, 3280, 1, 0, 3296, 1, 0, 4480, 17, 0, 4480, 17, 0, 7632, 2, 0, 7648, 2, 0, 8336, 2, 0, 8352, 2, 0, 10384, 1024, 0, 12560, 16960, 0, 12560, 16960, 0, 12560, 16960, 0, 17216, 8194, 0, 17216, 8194, 0, 17536, 18468, 0, 17536, 18468, 0, 17536, 18468, 0, 17536, 18468, 0, 3264, 1, 0, 3280, 1, 0, 3296, 1, 0, 4480, 17, 0, 4480, 17, 0, 7632, 2, 0, 7648, 2, 0, 8336, 2, 0, 8352, 2, 0, 10384, 1024, 0, 12560, 16960, 0, 12560, 16960, 0, 12560, 16960, 0, 17216, 8194, 0, 17216, 8194, 0, 17536, 18468, 0, 17536, 18468, 0, 17536, 18468, 0, 17536, 18468, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578816554503947_566_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578816554503947_566_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..76118844 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578816554503947_566_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 43008, 0, 1472, 43008, 0, 1472, 43008, 0, 4992, 34816, 0, 4992, 34816, 0, 5824, 40960, 0, 5824, 40960, 0, 6400, 682, 0, 6400, 682, 0, 6400, 682, 0, 6400, 682, 0, 6400, 682, 0, 6400, 43008, 0, 6400, 43008, 0, 6400, 43008, 0, 1472, 43008, 0, 1472, 43008, 0, 1472, 43008, 0, 4992, 34816, 0, 4992, 34816, 0, 5824, 40960, 0, 5824, 40960, 0, 6400, 682, 0, 6400, 682, 0, 6400, 682, 0, 6400, 682, 0, 6400, 682, 0, 6400, 43008, 0, 6400, 43008, 0, 6400, 43008, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578816697475538_567_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578816697475538_567_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a1319ba1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578816697475538_567_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,236 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4608, 17476, 0, 4608, 17476, 0, 4608, 17476, 0, 4608, 17476, 0, 5056, 34952, 0, 5056, 34952, 0, 5056, 34952, 0, 5056, 34952, 0, 8128, 16419, 0, 8128, 16419, 0, 8128, 16419, 0, 8128, 16419, 0, 7872, 43656, 0, 7872, 43656, 0, 7872, 43656, 0, 7872, 43656, 0, 7872, 43656, 0, 7872, 43656, 0, 7616, 5460, 0, 7616, 5460, 0, 7616, 5460, 0, 7616, 5460, 0, 7616, 5460, 0, 7616, 5460, 0, 11392, 32896, 0, 11392, 32896, 0, 11008, 16680, 0, 11008, 16680, 0, 11008, 16680, 0, 11008, 16680, 0, 10752, 4160, 0, 10752, 4160, 0, 576, 17, 0, 576, 17, 0, 4608, 17476, 0, 4608, 17476, 0, 4608, 17476, 0, 4608, 17476, 0, 5056, 34952, 0, 5056, 34952, 0, 5056, 34952, 0, 5056, 34952, 0, 8128, 16419, 0, 8128, 16419, 0, 8128, 16419, 0, 8128, 16419, 0, 7872, 43656, 0, 7872, 43656, 0, 7872, 43656, 0, 7872, 43656, 0, 7872, 43656, 0, 7872, 43656, 0, 7616, 5460, 0, 7616, 5460, 0, 7616, 5460, 0, 7616, 5460, 0, 7616, 5460, 0, 7616, 5460, 0, 11392, 32896, 0, 11392, 32896, 0, 11008, 16680, 0, 11008, 16680, 0, 11008, 16680, 0, 11008, 16680, 0, 10752, 4160, 0, 10752, 4160, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578817156802020_568_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578817156802020_568_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c6edd398 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578817156802020_568_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((121 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 582 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 1664, 16644, 0, 1664, 16644, 0, 1664, 16644, 0, 3392, 8, 0, 3408, 8, 0, 3424, 8, 0, 4480, 42, 0, 4480, 42, 0, 4480, 42, 0, 4484, 42, 0, 4484, 42, 0, 4484, 42, 0, 4488, 42, 0, 4488, 42, 0, 4488, 42, 0, 4496, 42, 0, 4496, 42, 0, 4496, 42, 0, 4500, 42, 0, 4500, 42, 0, 4500, 42, 0, 4504, 42, 0, 4504, 42, 0, 4504, 42, 0, 4512, 42, 0, 4512, 42, 0, 4512, 42, 0, 4516, 42, 0, 4516, 42, 0, 4516, 42, 0, 4520, 42, 0, 4520, 42, 0, 4520, 42, 0, 6848, 128, 0, 6852, 128, 0, 6856, 128, 0, 6864, 128, 0, 6868, 128, 0, 6872, 128, 0, 6880, 128, 0, 6884, 128, 0, 6888, 128, 0, 7744, 40, 0, 7744, 40, 0, 7748, 40, 0, 7748, 40, 0, 7752, 40, 0, 7752, 40, 0, 7760, 40, 0, 7760, 40, 0, 7764, 40, 0, 7764, 40, 0, 7768, 40, 0, 7768, 40, 0, 7776, 40, 0, 7776, 40, 0, 7780, 40, 0, 7780, 40, 0, 7784, 40, 0, 7784, 40, 0, 8192, 43008, 0, 8192, 43008, 0, 8192, 43008, 0, 8196, 43008, 0, 8196, 43008, 0, 8196, 43008, 0, 8200, 43008, 0, 8200, 43008, 0, 8200, 43008, 0, 8208, 43008, 0, 8208, 43008, 0, 8208, 43008, 0, 8212, 43008, 0, 8212, 43008, 0, 8212, 43008, 0, 8216, 43008, 0, 8216, 43008, 0, 8216, 43008, 0, 8224, 43008, 0, 8224, 43008, 0, 8224, 43008, 0, 8228, 43008, 0, 8228, 43008, 0, 8228, 43008, 0, 8232, 43008, 0, 8232, 43008, 0, 8232, 43008, 0, 9408, 40, 0, 9408, 40, 0, 9424, 40, 0, 9424, 40, 0, 9440, 40, 0, 9440, 40, 0, 768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 1664, 16644, 0, 1664, 16644, 0, 1664, 16644, 0, 3392, 8, 0, 3408, 8, 0, 3424, 8, 0, 4480, 42, 0, 4480, 42, 0, 4480, 42, 0, 4484, 42, 0, 4484, 42, 0, 4484, 42, 0, 4488, 42, 0, 4488, 42, 0, 4488, 42, 0, 4496, 42, 0, 4496, 42, 0, 4496, 42, 0, 4500, 42, 0, 4500, 42, 0, 4500, 42, 0, 4504, 42, 0, 4504, 42, 0, 4504, 42, 0, 4512, 42, 0, 4512, 42, 0, 4512, 42, 0, 4516, 42, 0, 4516, 42, 0, 4516, 42, 0, 4520, 42, 0, 4520, 42, 0, 4520, 42, 0, 6848, 128, 0, 6852, 128, 0, 6856, 128, 0, 6864, 128, 0, 6868, 128, 0, 6872, 128, 0, 6880, 128, 0, 6884, 128, 0, 6888, 128, 0, 7744, 40, 0, 7744, 40, 0, 7748, 40, 0, 7748, 40, 0, 7752, 40, 0, 7752, 40, 0, 7760, 40, 0, 7760, 40, 0, 7764, 40, 0, 7764, 40, 0, 7768, 40, 0, 7768, 40, 0, 7776, 40, 0, 7776, 40, 0, 7780, 40, 0, 7780, 40, 0, 7784, 40, 0, 7784, 40, 0, 8192, 43008, 0, 8192, 43008, 0, 8192, 43008, 0, 8196, 43008, 0, 8196, 43008, 0, 8196, 43008, 0, 8200, 43008, 0, 8200, 43008, 0, 8200, 43008, 0, 8208, 43008, 0, 8208, 43008, 0, 8208, 43008, 0, 8212, 43008, 0, 8212, 43008, 0, 8212, 43008, 0, 8216, 43008, 0, 8216, 43008, 0, 8216, 43008, 0, 8224, 43008, 0, 8224, 43008, 0, 8224, 43008, 0, 8228, 43008, 0, 8228, 43008, 0, 8228, 43008, 0, 8232, 43008, 0, 8232, 43008, 0, 8232, 43008, 0, 9408, 40, 0, 9408, 40, 0, 9424, 40, 0, 9424, 40, 0, 9440, 40, 0, 9440, 40, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578845454592820_570_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578845454592820_570_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dfcb48ab --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578845454592820_570_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,278 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((262 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((285 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 2)) { + break; + } + } + } + case 1: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((347 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((356 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 57347, 0, 1104, 57347, 0, 1104, 57347, 0, 1104, 57347, 0, 1104, 57347, 0, 3408, 8, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 4, 0, 10816, 4096, 0, 10832, 4096, 0, 10848, 4096, 0, 12164, 1, 0, 12168, 1, 0, 12180, 1, 0, 12184, 1, 0, 12196, 1, 0, 12200, 1, 0, 13504, 1, 0, 13520, 1, 0, 13536, 1, 0, 14272, 1040, 0, 14272, 1040, 0, 14592, 16644, 0, 14592, 16644, 0, 14592, 16644, 0, 1104, 57347, 0, 1104, 57347, 0, 1104, 57347, 0, 1104, 57347, 0, 1104, 57347, 0, 3408, 8, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 63491, 0, 8528, 4, 0, 10816, 4096, 0, 10832, 4096, 0, 10848, 4096, 0, 12164, 1, 0, 12168, 1, 0, 12180, 1, 0, 12184, 1, 0, 12196, 1, 0, 12200, 1, 0, 13504, 1, 0, 13520, 1, 0, 13536, 1, 0, 14272, 1040, 0, 14272, 1040, 0, 14592, 16644, 0, 14592, 16644, 0, 14592, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578854355189341_572_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578854355189341_572_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f8bada2a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578854355189341_572_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 16448, 0, 2560, 16448, 0, 4800, 17, 0, 4800, 17, 0, 5696, 1024, 0, 2560, 16448, 0, 2560, 16448, 0, 4800, 17, 0, 4800, 17, 0, 5696, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578895773997409_575_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578895773997409_575_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5da3d646 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578895773997409_575_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 61441, 0, 1088, 61441, 0, 1088, 61441, 0, 1088, 61441, 0, 1088, 61441, 0, 2512, 1, 0, 2516, 1, 0, 2528, 1, 0, 2532, 1, 0, 5200, 8192, 0, 5204, 8192, 0, 5216, 8192, 0, 5220, 8192, 0, 5648, 1, 0, 5652, 1, 0, 5664, 1, 0, 5668, 1, 0, 6096, 57344, 0, 6096, 57344, 0, 6096, 57344, 0, 6100, 57344, 0, 6100, 57344, 0, 6100, 57344, 0, 6112, 57344, 0, 6112, 57344, 0, 6112, 57344, 0, 6116, 57344, 0, 6116, 57344, 0, 6116, 57344, 0, 7696, 16384, 0, 7712, 16384, 0, 9216, 256, 0, 9232, 256, 0, 9664, 256, 0, 9680, 256, 0, 10944, 85, 0, 10944, 85, 0, 10944, 85, 0, 10944, 85, 0, 1088, 61441, 0, 1088, 61441, 0, 1088, 61441, 0, 1088, 61441, 0, 1088, 61441, 0, 2512, 1, 0, 2516, 1, 0, 2528, 1, 0, 2532, 1, 0, 5200, 8192, 0, 5204, 8192, 0, 5216, 8192, 0, 5220, 8192, 0, 5648, 1, 0, 5652, 1, 0, 5664, 1, 0, 5668, 1, 0, 6096, 57344, 0, 6096, 57344, 0, 6096, 57344, 0, 6100, 57344, 0, 6100, 57344, 0, 6100, 57344, 0, 6112, 57344, 0, 6112, 57344, 0, 6112, 57344, 0, 6116, 57344, 0, 6116, 57344, 0, 6116, 57344, 0, 7696, 16384, 0, 7712, 16384, 0, 9216, 256, 0, 9232, 256, 0, 9664, 256, 0, 9680, 256, 0, 10944, 85, 0, 10944, 85, 0, 10944, 85, 0, 10944, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578898897087024_576_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578898897087024_576_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..241d86fd --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578898897087024_576_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,263 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 7552, 24576, 0, 7552, 24576, 0, 10176, 512, 0, 10624, 34952, 0, 10624, 34952, 0, 10624, 34952, 0, 10624, 34952, 0, 11776, 49155, 0, 11776, 49155, 0, 11776, 49155, 0, 11776, 49155, 0, 12416, 49152, 0, 12416, 49152, 0, 13584, 49152, 0, 13584, 49152, 0, 13600, 49152, 0, 13600, 49152, 0, 13616, 49152, 0, 13616, 49152, 0, 14800, 49152, 0, 14800, 49152, 0, 14804, 49152, 0, 14804, 49152, 0, 14816, 49152, 0, 14816, 49152, 0, 14820, 49152, 0, 14820, 49152, 0, 14832, 49152, 0, 14832, 49152, 0, 14836, 49152, 0, 14836, 49152, 0, 15504, 49152, 0, 15504, 49152, 0, 15520, 49152, 0, 15520, 49152, 0, 15536, 49152, 0, 15536, 49152, 0, 576, 17, 0, 576, 17, 0, 7552, 24576, 0, 7552, 24576, 0, 10176, 512, 0, 10624, 34952, 0, 10624, 34952, 0, 10624, 34952, 0, 10624, 34952, 0, 11776, 49155, 0, 11776, 49155, 0, 11776, 49155, 0, 11776, 49155, 0, 12416, 49152, 0, 12416, 49152, 0, 13584, 49152, 0, 13584, 49152, 0, 13600, 49152, 0, 13600, 49152, 0, 13616, 49152, 0, 13616, 49152, 0, 14800, 49152, 0, 14800, 49152, 0, 14804, 49152, 0, 14804, 49152, 0, 14816, 49152, 0, 14816, 49152, 0, 14820, 49152, 0, 14820, 49152, 0, 14832, 49152, 0, 14832, 49152, 0, 14836, 49152, 0, 14836, 49152, 0, 15504, 49152, 0, 15504, 49152, 0, 15520, 49152, 0, 15520, 49152, 0, 15536, 49152, 0, 15536, 49152, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578916555035004_578_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578916555035004_578_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eba2abea --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578916555035004_578_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,111 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 264, 0, 1152, 264, 0, 1168, 264, 0, 1168, 264, 0, 1184, 264, 0, 1184, 264, 0, 3648, 128, 0, 3664, 128, 0, 3680, 128, 0, 4864, 512, 0, 4880, 512, 0, 4896, 512, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 6400, 1156, 0, 6400, 1156, 0, 6400, 1156, 0, 6416, 1156, 0, 6416, 1156, 0, 6416, 1156, 0, 6432, 1156, 0, 6432, 1156, 0, 6432, 1156, 0, 1152, 264, 0, 1152, 264, 0, 1168, 264, 0, 1168, 264, 0, 1184, 264, 0, 1184, 264, 0, 3648, 128, 0, 3664, 128, 0, 3680, 128, 0, 4864, 512, 0, 4880, 512, 0, 4896, 512, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5440, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5456, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 5472, 43690, 0, 6400, 1156, 0, 6400, 1156, 0, 6400, 1156, 0, 6416, 1156, 0, 6416, 1156, 0, 6416, 1156, 0, 6432, 1156, 0, 6432, 1156, 0, 6432, 1156, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578918184118000_579_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578918184118000_579_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bc7d9dff --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578918184118000_579_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,147 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4352, 17476, 0, 4352, 17476, 0, 4352, 17476, 0, 4352, 17476, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 6016, 3, 0, 6016, 3, 0, 6032, 3, 0, 6032, 3, 0, 6048, 3, 0, 6048, 3, 0, 576, 17, 0, 576, 17, 0, 4352, 17476, 0, 4352, 17476, 0, 4352, 17476, 0, 4352, 17476, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 4800, 52428, 0, 6016, 3, 0, 6016, 3, 0, 6032, 3, 0, 6032, 3, 0, 6048, 3, 0, 6048, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578918653430196_580_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578918653430196_580_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cd3c63db --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578918653430196_580_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 32778, 0, 1792, 32778, 0, 1792, 32778, 0, 1808, 32778, 0, 1808, 32778, 0, 1808, 32778, 0, 2944, 32768, 0, 2960, 32768, 0, 3776, 32778, 0, 3776, 32778, 0, 3776, 32778, 0, 3792, 32778, 0, 3792, 32778, 0, 3792, 32778, 0, 4480, 40960, 0, 4480, 40960, 0, 4496, 40960, 0, 4496, 40960, 0, 6480, 1284, 0, 6480, 1284, 0, 6480, 1284, 0, 6496, 1284, 0, 6496, 1284, 0, 6496, 1284, 0, 12544, 32769, 0, 12544, 32769, 0, 12288, 31744, 0, 12288, 31744, 0, 12288, 31744, 0, 12288, 31744, 0, 12288, 31744, 0, 12032, 520, 0, 12032, 520, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 32778, 0, 1792, 32778, 0, 1792, 32778, 0, 1808, 32778, 0, 1808, 32778, 0, 1808, 32778, 0, 2944, 32768, 0, 2960, 32768, 0, 3776, 32778, 0, 3776, 32778, 0, 3776, 32778, 0, 3792, 32778, 0, 3792, 32778, 0, 3792, 32778, 0, 4480, 40960, 0, 4480, 40960, 0, 4496, 40960, 0, 4496, 40960, 0, 6480, 1284, 0, 6480, 1284, 0, 6480, 1284, 0, 6496, 1284, 0, 6496, 1284, 0, 6496, 1284, 0, 12544, 32769, 0, 12544, 32769, 0, 12288, 31744, 0, 12288, 31744, 0, 12288, 31744, 0, 12288, 31744, 0, 12288, 31744, 0, 12032, 520, 0, 12032, 520, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578938638643764_583_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578938638643764_583_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578938638643764_583_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578938737348787_584_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578938737348787_584_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b16e19bf --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578938737348787_584_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,490 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((390 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((399 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((419 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((439 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((453 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((462 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2240, 4369, 0, 2240, 4369, 0, 2240, 4369, 0, 2240, 4369, 0, 6080, 4369, 0, 6080, 4369, 0, 6080, 4369, 0, 6080, 4369, 0, 6976, 17476, 0, 6976, 17476, 0, 6976, 17476, 0, 6976, 17476, 0, 7424, 34952, 0, 7424, 34952, 0, 7424, 34952, 0, 7424, 34952, 0, 8256, 1, 0, 11456, 16, 0, 21504, 8226, 0, 21504, 8226, 0, 21504, 8226, 0, 25536, 4, 0, 25552, 4, 0, 25568, 4, 0, 28096, 34952, 0, 28096, 34952, 0, 28096, 34952, 0, 28096, 34952, 0, 28112, 34952, 0, 28112, 34952, 0, 28112, 34952, 0, 28112, 34952, 0, 28128, 34952, 0, 28128, 34952, 0, 28128, 34952, 0, 28128, 34952, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2240, 4369, 0, 2240, 4369, 0, 2240, 4369, 0, 2240, 4369, 0, 6080, 4369, 0, 6080, 4369, 0, 6080, 4369, 0, 6080, 4369, 0, 6976, 17476, 0, 6976, 17476, 0, 6976, 17476, 0, 6976, 17476, 0, 7424, 34952, 0, 7424, 34952, 0, 7424, 34952, 0, 7424, 34952, 0, 8256, 1, 0, 11456, 16, 0, 21504, 8226, 0, 21504, 8226, 0, 21504, 8226, 0, 25536, 4, 0, 25552, 4, 0, 25568, 4, 0, 28096, 34952, 0, 28096, 34952, 0, 28096, 34952, 0, 28096, 34952, 0, 28112, 34952, 0, 28112, 34952, 0, 28112, 34952, 0, 28112, 34952, 0, 28128, 34952, 0, 28128, 34952, 0, 28128, 34952, 0, 28128, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578942779775177_585_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578942779775177_585_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..adafe7f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578942779775177_585_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,150 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 5776, 34048, 0, 5776, 34048, 0, 5776, 34048, 0, 5792, 34048, 0, 5792, 34048, 0, 5792, 34048, 0, 5808, 34048, 0, 5808, 34048, 0, 5808, 34048, 0, 6976, 256, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 5776, 34048, 0, 5776, 34048, 0, 5776, 34048, 0, 5792, 34048, 0, 5792, 34048, 0, 5792, 34048, 0, 5808, 34048, 0, 5808, 34048, 0, 5808, 34048, 0, 6976, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578943048784104_586_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578943048784104_586_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d7ebec43 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578943048784104_586_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,151 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6272, 256, 0, 7168, 17476, 0, 7168, 17476, 0, 7168, 17476, 0, 7168, 17476, 0, 7616, 34952, 0, 7616, 34952, 0, 7616, 34952, 0, 7616, 34952, 0, 6272, 256, 0, 7168, 17476, 0, 7168, 17476, 0, 7168, 17476, 0, 7168, 17476, 0, 7616, 34952, 0, 7616, 34952, 0, 7616, 34952, 0, 7616, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578943241228071_587_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578943241228071_587_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17278a1b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578943241228071_587_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578943353736284_588_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578943353736284_588_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa8d3e22 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578943353736284_588_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,133 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 17, 0, 1792, 17, 0, 2688, 17476, 0, 2688, 17476, 0, 2688, 17476, 0, 2688, 17476, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 17, 0, 1792, 17, 0, 2688, 17476, 0, 2688, 17476, 0, 2688, 17476, 0, 2688, 17476, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578943546427375_589_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578943546427375_589_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1e00f2e8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578943546427375_589_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,553 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((150 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((365 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((374 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((435 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (443 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (476 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((499 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((524 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (539 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (558 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((574 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (583 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (588 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (598 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (616 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (627 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (632 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 2832, 4, 0, 2848, 4, 0, 10320, 4, 0, 10336, 4, 0, 11792, 256, 0, 11808, 256, 0, 12864, 73, 0, 12864, 73, 0, 12864, 73, 0, 13440, 4161, 0, 13440, 4161, 0, 13440, 4161, 0, 23376, 1024, 0, 23392, 1024, 0, 23408, 1024, 0, 23952, 1024, 0, 23968, 1024, 0, 23984, 1024, 0, 28352, 18724, 0, 28352, 18724, 0, 28352, 18724, 0, 28352, 18724, 0, 28352, 18724, 0, 35712, 4113, 0, 35712, 4113, 0, 35712, 4113, 0, 37312, 4369, 0, 37312, 4369, 0, 37312, 4369, 0, 37312, 4369, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 38272, 73, 0, 38272, 73, 0, 38272, 73, 0, 40448, 18724, 0, 40448, 18724, 0, 40448, 18724, 0, 40448, 18724, 0, 40448, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 2832, 4, 0, 2848, 4, 0, 10320, 4, 0, 10336, 4, 0, 11792, 256, 0, 11808, 256, 0, 12864, 73, 0, 12864, 73, 0, 12864, 73, 0, 13440, 4161, 0, 13440, 4161, 0, 13440, 4161, 0, 23376, 1024, 0, 23392, 1024, 0, 23408, 1024, 0, 23952, 1024, 0, 23968, 1024, 0, 23984, 1024, 0, 28352, 18724, 0, 28352, 18724, 0, 28352, 18724, 0, 28352, 18724, 0, 28352, 18724, 0, 35712, 4113, 0, 35712, 4113, 0, 35712, 4113, 0, 37312, 4369, 0, 37312, 4369, 0, 37312, 4369, 0, 37312, 4369, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 37632, 30583, 0, 38272, 73, 0, 38272, 73, 0, 38272, 73, 0, 40448, 18724, 0, 40448, 18724, 0, 40448, 18724, 0, 40448, 18724, 0, 40448, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578961013797852_591_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578961013797852_591_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..59be111f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578961013797852_591_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,194 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 5632, 85, 0, 5632, 85, 0, 5632, 85, 0, 5632, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 5632, 85, 0, 5632, 85, 0, 5632, 85, 0, 5632, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578961274598047_592_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578961274598047_592_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..76fe0d43 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578961274598047_592_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 1664, 16644, 0, 1664, 16644, 0, 1664, 16644, 0, 3136, 85, 0, 3136, 85, 0, 3136, 85, 0, 3136, 85, 0, 768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 1664, 16644, 0, 1664, 16644, 0, 1664, 16644, 0, 3136, 85, 0, 3136, 85, 0, 3136, 85, 0, 3136, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578961480141008_593_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578961480141008_593_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..78ab387a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578961480141008_593_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,269 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 73, 0, 1216, 73, 0, 1216, 73, 0, 1792, 16, 0, 2112, 36, 0, 2112, 36, 0, 3392, 21760, 0, 3392, 21760, 0, 3392, 21760, 0, 3392, 21760, 0, 4928, 43520, 0, 4928, 43520, 0, 4928, 43520, 0, 4928, 43520, 0, 5504, 43520, 0, 5504, 43520, 0, 5504, 43520, 0, 5504, 43520, 0, 1216, 73, 0, 1216, 73, 0, 1216, 73, 0, 1792, 16, 0, 2112, 36, 0, 2112, 36, 0, 3392, 21760, 0, 3392, 21760, 0, 3392, 21760, 0, 3392, 21760, 0, 4928, 43520, 0, 4928, 43520, 0, 4928, 43520, 0, 4928, 43520, 0, 5504, 43520, 0, 5504, 43520, 0, 5504, 43520, 0, 5504, 43520, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578961837610164_594_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578961837610164_594_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c8c5f211 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578961837610164_594_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,200 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 12)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 16384, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 5952, 16, 0, 4096, 16384, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 4544, 127, 0, 5952, 16, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578962018443612_595_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578962018443612_595_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5dea518 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578962018443612_595_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 384 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 16392, 0, 1408, 16392, 0, 2624, 18504, 0, 2624, 18504, 0, 2624, 18504, 0, 2624, 18504, 0, 4304, 32769, 0, 4304, 32769, 0, 4308, 32769, 0, 4308, 32769, 0, 4312, 32769, 0, 4312, 32769, 0, 4944, 17, 0, 4944, 17, 0, 4948, 17, 0, 4948, 17, 0, 4952, 17, 0, 4952, 17, 0, 5840, 17476, 0, 5840, 17476, 0, 5840, 17476, 0, 5840, 17476, 0, 5844, 17476, 0, 5844, 17476, 0, 5844, 17476, 0, 5844, 17476, 0, 5848, 17476, 0, 5848, 17476, 0, 5848, 17476, 0, 5848, 17476, 0, 6288, 34952, 0, 6288, 34952, 0, 6288, 34952, 0, 6288, 34952, 0, 6292, 34952, 0, 6292, 34952, 0, 6292, 34952, 0, 6292, 34952, 0, 6296, 34952, 0, 6296, 34952, 0, 6296, 34952, 0, 6296, 34952, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7952, 1, 0, 1408, 16392, 0, 1408, 16392, 0, 2624, 18504, 0, 2624, 18504, 0, 2624, 18504, 0, 2624, 18504, 0, 4304, 32769, 0, 4304, 32769, 0, 4308, 32769, 0, 4308, 32769, 0, 4312, 32769, 0, 4312, 32769, 0, 4944, 17, 0, 4944, 17, 0, 4948, 17, 0, 4948, 17, 0, 4952, 17, 0, 4952, 17, 0, 5840, 17476, 0, 5840, 17476, 0, 5840, 17476, 0, 5840, 17476, 0, 5844, 17476, 0, 5844, 17476, 0, 5844, 17476, 0, 5844, 17476, 0, 5848, 17476, 0, 5848, 17476, 0, 5848, 17476, 0, 5848, 17476, 0, 6288, 34952, 0, 6288, 34952, 0, 6288, 34952, 0, 6288, 34952, 0, 6292, 34952, 0, 6292, 34952, 0, 6292, 34952, 0, 6292, 34952, 0, 6296, 34952, 0, 6296, 34952, 0, 6296, 34952, 0, 6296, 34952, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6992, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 6996, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7000, 63491, 0, 7952, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578972472268044_598_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578972472268044_598_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e60b7b0e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578972472268044_598_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3136, 1282, 0, 3136, 1282, 0, 3136, 1282, 0, 2880, 2048, 0, 2624, 16384, 0, 2240, 512, 0, 3776, 85, 0, 3776, 85, 0, 3776, 85, 0, 3776, 85, 0, 6976, 57345, 0, 6976, 57345, 0, 6976, 57345, 0, 6976, 57345, 0, 6592, 7170, 0, 6592, 7170, 0, 6592, 7170, 0, 6592, 7170, 0, 6336, 4, 0, 3136, 1282, 0, 3136, 1282, 0, 3136, 1282, 0, 2880, 2048, 0, 2624, 16384, 0, 2240, 512, 0, 3776, 85, 0, 3776, 85, 0, 3776, 85, 0, 3776, 85, 0, 6976, 57345, 0, 6976, 57345, 0, 6976, 57345, 0, 6976, 57345, 0, 6592, 7170, 0, 6592, 7170, 0, 6592, 7170, 0, 6592, 7170, 0, 6336, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578973427339458_600_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578973427339458_600_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a79047cf --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578973427339458_600_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,168 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 256, 0, 3328, 16389, 0, 3328, 16389, 0, 3328, 16389, 0, 5376, 8192, 0, 11072, 8193, 0, 11072, 8193, 0, 11776, 20492, 0, 11776, 20492, 0, 11776, 20492, 0, 11776, 20492, 0, 11776, 8193, 0, 11776, 8193, 0, 2624, 256, 0, 3328, 16389, 0, 3328, 16389, 0, 3328, 16389, 0, 5376, 8192, 0, 11072, 8193, 0, 11072, 8193, 0, 11776, 20492, 0, 11776, 20492, 0, 11776, 20492, 0, 11776, 20492, 0, 11776, 8193, 0, 11776, 8193, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756578973626712760_601_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756578973626712760_601_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..439f2f77 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756578973626712760_601_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,345 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((328 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 462 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 6912, 1040, 0, 6912, 1040, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 10368, 20481, 0, 10368, 20481, 0, 10368, 20481, 0, 10384, 20481, 0, 10384, 20481, 0, 10384, 20481, 0, 12880, 2, 0, 12896, 2, 0, 12912, 2, 0, 18112, 1, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 20996, 10305, 0, 20996, 10305, 0, 20996, 10305, 0, 20996, 10305, 0, 21000, 10305, 0, 21000, 10305, 0, 21000, 10305, 0, 21000, 10305, 0, 21004, 10305, 0, 21004, 10305, 0, 21004, 10305, 0, 21004, 10305, 0, 21012, 10305, 0, 21012, 10305, 0, 21012, 10305, 0, 21012, 10305, 0, 21016, 10305, 0, 21016, 10305, 0, 21016, 10305, 0, 21016, 10305, 0, 21020, 10305, 0, 21020, 10305, 0, 21020, 10305, 0, 21020, 10305, 0, 21028, 10305, 0, 21028, 10305, 0, 21028, 10305, 0, 21028, 10305, 0, 21032, 10305, 0, 21032, 10305, 0, 21032, 10305, 0, 21032, 10305, 0, 21036, 10305, 0, 21036, 10305, 0, 21036, 10305, 0, 21036, 10305, 0, 768, 73, 0, 768, 73, 0, 768, 73, 0, 6912, 1040, 0, 6912, 1040, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 8448, 18724, 0, 10368, 20481, 0, 10368, 20481, 0, 10368, 20481, 0, 10384, 20481, 0, 10384, 20481, 0, 10384, 20481, 0, 12880, 2, 0, 12896, 2, 0, 12912, 2, 0, 18112, 1, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19328, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19344, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 19360, 48131, 0, 20996, 10305, 0, 20996, 10305, 0, 20996, 10305, 0, 20996, 10305, 0, 21000, 10305, 0, 21000, 10305, 0, 21000, 10305, 0, 21000, 10305, 0, 21004, 10305, 0, 21004, 10305, 0, 21004, 10305, 0, 21004, 10305, 0, 21012, 10305, 0, 21012, 10305, 0, 21012, 10305, 0, 21012, 10305, 0, 21016, 10305, 0, 21016, 10305, 0, 21016, 10305, 0, 21016, 10305, 0, 21020, 10305, 0, 21020, 10305, 0, 21020, 10305, 0, 21020, 10305, 0, 21028, 10305, 0, 21028, 10305, 0, 21028, 10305, 0, 21028, 10305, 0, 21032, 10305, 0, 21032, 10305, 0, 21032, 10305, 0, 21032, 10305, 0, 21036, 10305, 0, 21036, 10305, 0, 21036, 10305, 0, 21036, 10305, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579022082284647_602_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579022082284647_602_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..997a55d8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579022082284647_602_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,73 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0, 976, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579022227501545_603_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579022227501545_603_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7327f8f7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579022227501545_603_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,406 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((398 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((417 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((424 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((431 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i8 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [11776, 17476, 0, 11776, 17476, 0, 11776, 17476, 0, 11776, 17476, 0, 16960, 17, 0, 16960, 17, 0, 6676, 4096, 0, 6680, 4096, 0, 6684, 4096, 0, 8064, 17, 0, 8064, 17, 0, 9088, 8738, 0, 9088, 8738, 0, 9088, 8738, 0, 9088, 8738, 0, 11456, 8738, 0, 11456, 8738, 0, 11456, 8738, 0, 11456, 8738, 0, 3092, 17409, 0, 3092, 17409, 0, 3092, 17409, 0, 3096, 17409, 0, 3096, 17409, 0, 3096, 17409, 0, 3100, 17409, 0, 3100, 17409, 0, 3100, 17409, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 27584, 136, 0, 27584, 136, 0, 27600, 136, 0, 27600, 136, 0, 27616, 136, 0, 27616, 136, 0, 11776, 17476, 0, 11776, 17476, 0, 11776, 17476, 0, 11776, 17476, 0, 16960, 17, 0, 16960, 17, 0, 6676, 4096, 0, 6680, 4096, 0, 6684, 4096, 0, 8064, 17, 0, 8064, 17, 0, 9088, 8738, 0, 9088, 8738, 0, 9088, 8738, 0, 9088, 8738, 0, 11456, 8738, 0, 11456, 8738, 0, 11456, 8738, 0, 11456, 8738, 0, 3092, 17409, 0, 3092, 17409, 0, 3092, 17409, 0, 3096, 17409, 0, 3096, 17409, 0, 3096, 17409, 0, 3100, 17409, 0, 3100, 17409, 0, 3100, 17409, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4564, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4568, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 4572, 21845, 0, 27584, 136, 0, 27584, 136, 0, 27600, 136, 0, 27600, 136, 0, 27616, 136, 0, 27616, 136, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579080155866512_606_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579080155866512_606_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e109800f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579080155866512_606_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,224 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7552, 73, 0, 7552, 73, 0, 7552, 73, 0, 8768, 1040, 0, 8768, 1040, 0, 9408, 18724, 0, 9408, 18724, 0, 9408, 18724, 0, 9408, 18724, 0, 9408, 18724, 0, 7552, 73, 0, 7552, 73, 0, 7552, 73, 0, 8768, 1040, 0, 8768, 1040, 0, 9408, 18724, 0, 9408, 18724, 0, 9408, 18724, 0, 9408, 18724, 0, 9408, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579080800014109_607_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579080800014109_607_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..68081677 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579080800014109_607_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,103 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0, 1472, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579080984943660_608_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579080984943660_608_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..04a204d2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579080984943660_608_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 31, 0, 1040, 31, 0, 1040, 31, 0, 1040, 31, 0, 1040, 31, 0, 1056, 31, 0, 1056, 31, 0, 1056, 31, 0, 1056, 31, 0, 1056, 31, 0, 1072, 31, 0, 1072, 31, 0, 1072, 31, 0, 1072, 31, 0, 1072, 31, 0, 2192, 3, 0, 2192, 3, 0, 2208, 3, 0, 2208, 3, 0, 2224, 3, 0, 2224, 3, 0, 3920, 1, 0, 3924, 1, 0, 3928, 1, 0, 3936, 1, 0, 3940, 1, 0, 3944, 1, 0, 3952, 1, 0, 3956, 1, 0, 3960, 1, 0, 5392, 57345, 0, 5392, 57345, 0, 5392, 57345, 0, 5392, 57345, 0, 5408, 57345, 0, 5408, 57345, 0, 5408, 57345, 0, 5408, 57345, 0, 5424, 57345, 0, 5424, 57345, 0, 5424, 57345, 0, 5424, 57345, 0, 1040, 31, 0, 1040, 31, 0, 1040, 31, 0, 1040, 31, 0, 1040, 31, 0, 1056, 31, 0, 1056, 31, 0, 1056, 31, 0, 1056, 31, 0, 1056, 31, 0, 1072, 31, 0, 1072, 31, 0, 1072, 31, 0, 1072, 31, 0, 1072, 31, 0, 2192, 3, 0, 2192, 3, 0, 2208, 3, 0, 2208, 3, 0, 2224, 3, 0, 2224, 3, 0, 3920, 1, 0, 3924, 1, 0, 3928, 1, 0, 3936, 1, 0, 3940, 1, 0, 3944, 1, 0, 3952, 1, 0, 3956, 1, 0, 3960, 1, 0, 5392, 57345, 0, 5392, 57345, 0, 5392, 57345, 0, 5392, 57345, 0, 5408, 57345, 0, 5408, 57345, 0, 5408, 57345, 0, 5408, 57345, 0, 5424, 57345, 0, 5424, 57345, 0, 5424, 57345, 0, 5424, 57345, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579082846928648_609_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579082846928648_609_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bf0233c9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579082846928648_609_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2304, 1152, 0, 2304, 1152, 0, 2320, 1152, 0, 2320, 1152, 0, 3460, 1040, 0, 3460, 1040, 0, 3464, 1040, 0, 3464, 1040, 0, 3468, 1040, 0, 3468, 1040, 0, 3476, 1040, 0, 3476, 1040, 0, 3480, 1040, 0, 3480, 1040, 0, 3484, 1040, 0, 3484, 1040, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2304, 1152, 0, 2304, 1152, 0, 2320, 1152, 0, 2320, 1152, 0, 3460, 1040, 0, 3460, 1040, 0, 3464, 1040, 0, 3464, 1040, 0, 3468, 1040, 0, 3468, 1040, 0, 3476, 1040, 0, 3476, 1040, 0, 3480, 1040, 0, 3480, 1040, 0, 3484, 1040, 0, 3484, 1040, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0, 4736, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579083294512339_610_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579083294512339_610_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e0fd656b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579083294512339_610_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,168 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 7168, 18724, 0, 7168, 18724, 0, 7168, 18724, 0, 7168, 18724, 0, 7168, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 7168, 18724, 0, 7168, 18724, 0, 7168, 18724, 0, 7168, 18724, 0, 7168, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579083435626654_611_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579083435626654_611_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f44dde44 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579083435626654_611_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 3)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3968, 1, 0, 4992, 1, 0, 10880, 3, 0, 10880, 3, 0, 3968, 1, 0, 4992, 1, 0, 10880, 3, 0, 10880, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579133804645272_614_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579133804645272_614_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bd91de3b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579133804645272_614_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,298 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((253 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 16, 0, 1056, 16, 0, 1072, 16, 0, 2240, 73, 0, 2240, 73, 0, 2240, 73, 0, 6352, 128, 0, 6368, 128, 0, 9472, 128, 0, 10176, 1040, 0, 10176, 1040, 0, 10816, 16, 0, 11712, 1024, 0, 12736, 18724, 0, 12736, 18724, 0, 12736, 18724, 0, 12736, 18724, 0, 12736, 18724, 0, 17808, 8192, 0, 17824, 8192, 0, 1040, 16, 0, 1056, 16, 0, 1072, 16, 0, 2240, 73, 0, 2240, 73, 0, 2240, 73, 0, 6352, 128, 0, 6368, 128, 0, 9472, 128, 0, 10176, 1040, 0, 10176, 1040, 0, 10816, 16, 0, 11712, 1024, 0, 12736, 18724, 0, 12736, 18724, 0, 12736, 18724, 0, 12736, 18724, 0, 12736, 18724, 0, 17808, 8192, 0, 17824, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579144504915197_616_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579144504915197_616_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87dd9372 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579144504915197_616_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,292 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((121 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 462 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1616, 1040, 0, 1616, 1040, 0, 1632, 1040, 0, 1632, 1040, 0, 1648, 1040, 0, 1648, 1040, 0, 3088, 1040, 0, 3088, 1040, 0, 3092, 1040, 0, 3092, 1040, 0, 3096, 1040, 0, 3096, 1040, 0, 3104, 1040, 0, 3104, 1040, 0, 3108, 1040, 0, 3108, 1040, 0, 3112, 1040, 0, 3112, 1040, 0, 3120, 1040, 0, 3120, 1040, 0, 3124, 1040, 0, 3124, 1040, 0, 3128, 1040, 0, 3128, 1040, 0, 3664, 1040, 0, 3664, 1040, 0, 3680, 1040, 0, 3680, 1040, 0, 3696, 1040, 0, 3696, 1040, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 6400, 17, 0, 6400, 17, 0, 6404, 17, 0, 6404, 17, 0, 6408, 17, 0, 6408, 17, 0, 6416, 17, 0, 6416, 17, 0, 6420, 17, 0, 6420, 17, 0, 6424, 17, 0, 6424, 17, 0, 6432, 17, 0, 6432, 17, 0, 6436, 17, 0, 6436, 17, 0, 6440, 17, 0, 6440, 17, 0, 9216, 16, 0, 9232, 16, 0, 9248, 16, 0, 10112, 17476, 0, 10112, 17476, 0, 10112, 17476, 0, 10112, 17476, 0, 10560, 34952, 0, 10560, 34952, 0, 10560, 34952, 0, 10560, 34952, 0, 11200, 73, 0, 11200, 73, 0, 11200, 73, 0, 11776, 1040, 0, 11776, 1040, 0, 12096, 18724, 0, 12096, 18724, 0, 12096, 18724, 0, 12096, 18724, 0, 12096, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1616, 1040, 0, 1616, 1040, 0, 1632, 1040, 0, 1632, 1040, 0, 1648, 1040, 0, 1648, 1040, 0, 3088, 1040, 0, 3088, 1040, 0, 3092, 1040, 0, 3092, 1040, 0, 3096, 1040, 0, 3096, 1040, 0, 3104, 1040, 0, 3104, 1040, 0, 3108, 1040, 0, 3108, 1040, 0, 3112, 1040, 0, 3112, 1040, 0, 3120, 1040, 0, 3120, 1040, 0, 3124, 1040, 0, 3124, 1040, 0, 3128, 1040, 0, 3128, 1040, 0, 3664, 1040, 0, 3664, 1040, 0, 3680, 1040, 0, 3680, 1040, 0, 3696, 1040, 0, 3696, 1040, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 3968, 18724, 0, 6400, 17, 0, 6400, 17, 0, 6404, 17, 0, 6404, 17, 0, 6408, 17, 0, 6408, 17, 0, 6416, 17, 0, 6416, 17, 0, 6420, 17, 0, 6420, 17, 0, 6424, 17, 0, 6424, 17, 0, 6432, 17, 0, 6432, 17, 0, 6436, 17, 0, 6436, 17, 0, 6440, 17, 0, 6440, 17, 0, 9216, 16, 0, 9232, 16, 0, 9248, 16, 0, 10112, 17476, 0, 10112, 17476, 0, 10112, 17476, 0, 10112, 17476, 0, 10560, 34952, 0, 10560, 34952, 0, 10560, 34952, 0, 10560, 34952, 0, 11200, 73, 0, 11200, 73, 0, 11200, 73, 0, 11776, 1040, 0, 11776, 1040, 0, 12096, 18724, 0, 12096, 18724, 0, 12096, 18724, 0, 12096, 18724, 0, 12096, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579207333052715_617_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579207333052715_617_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5a28ebb5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579207333052715_617_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 1856, 8192, 0, 2176, 2080, 0, 2176, 2080, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 1856, 8192, 0, 2176, 2080, 0, 2176, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579207490258300_618_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579207490258300_618_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4c75c59c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579207490258300_618_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,563 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((402 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((436 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((455 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((462 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (467 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (479 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((496 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((522 << 6) | (i6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((529 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (539 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 17, 0, 1088, 17, 0, 7872, 2, 0, 8192, 32, 0, 17232, 1, 0, 17248, 1, 0, 18320, 1, 0, 18336, 1, 0, 20368, 1, 0, 20384, 1, 0, 29888, 17476, 0, 29888, 17476, 0, 29888, 17476, 0, 29888, 17476, 0, 1088, 17, 0, 1088, 17, 0, 7872, 2, 0, 8192, 32, 0, 17232, 1, 0, 17248, 1, 0, 18320, 1, 0, 18336, 1, 0, 20368, 1, 0, 20384, 1, 0, 29888, 17476, 0, 29888, 17476, 0, 29888, 17476, 0, 29888, 17476, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579212097070236_619_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579212097070236_619_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..af246fab --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579212097070236_619_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,279 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((191 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + case 2: { + if ((WaveGetLaneIndex() == 13)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 61441, 0, 3200, 61441, 0, 3200, 61441, 0, 3200, 61441, 0, 3200, 61441, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 1920, 1280, 0, 1920, 1280, 0, 4032, 73, 0, 4032, 73, 0, 4032, 73, 0, 12228, 3, 0, 12228, 3, 0, 12232, 3, 0, 12232, 3, 0, 12244, 3, 0, 12244, 3, 0, 12248, 3, 0, 12248, 3, 0, 12672, 512, 0, 12688, 512, 0, 15488, 256, 0, 16704, 32, 0, 16720, 32, 0, 16736, 32, 0, 17152, 8, 0, 3200, 61441, 0, 3200, 61441, 0, 3200, 61441, 0, 3200, 61441, 0, 3200, 61441, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 2560, 254, 0, 1920, 1280, 0, 1920, 1280, 0, 4032, 73, 0, 4032, 73, 0, 4032, 73, 0, 12228, 3, 0, 12228, 3, 0, 12232, 3, 0, 12232, 3, 0, 12244, 3, 0, 12244, 3, 0, 12248, 3, 0, 12248, 3, 0, 12672, 512, 0, 12688, 512, 0, 15488, 256, 0, 16704, 32, 0, 16720, 32, 0, 16736, 32, 0, 17152, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579217980642631_620_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579217980642631_620_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a02d5b60 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579217980642631_620_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,102 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 8, 0, 1680, 8, 0, 1664, 8, 0, 1680, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579218076415308_621_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579218076415308_621_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dcb4fd1e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579218076415308_621_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,126 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 5184, 16, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 5184, 16, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0, 5504, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579287127493433_624_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579287127493433_624_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6ae42f3f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579287127493433_624_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,184 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7056, 544, 0, 7056, 544, 0, 7872, 64, 0, 8448, 1024, 0, 8768, 16388, 0, 8768, 16388, 0, 9216, 34952, 0, 9216, 34952, 0, 9216, 34952, 0, 9216, 34952, 0, 7056, 544, 0, 7056, 544, 0, 7872, 64, 0, 8448, 1024, 0, 8768, 16388, 0, 8768, 16388, 0, 9216, 34952, 0, 9216, 34952, 0, 9216, 34952, 0, 9216, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579288045684419_625_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579288045684419_625_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4709f4f9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579288045684419_625_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,456 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((240 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((377 << 6) | (i7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((396 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (442 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (465 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (474 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter9 = 0; + while ((counter9 < 2)) { + counter9 = (counter9 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((490 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((497 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 33288, 0, 1360, 33288, 0, 1360, 33288, 0, 1376, 33288, 0, 1376, 33288, 0, 1376, 33288, 0, 7936, 18, 0, 7936, 18, 0, 7952, 18, 0, 7952, 18, 0, 7968, 18, 0, 7968, 18, 0, 8256, 18724, 0, 8256, 18724, 0, 8256, 18724, 0, 8256, 18724, 0, 8256, 18724, 0, 11840, 256, 0, 11856, 256, 0, 11872, 256, 0, 12544, 4096, 0, 12560, 4096, 0, 12576, 4096, 0, 13632, 8738, 0, 13632, 8738, 0, 13632, 8738, 0, 13632, 8738, 0, 15360, 16384, 0, 15364, 16384, 0, 15376, 16384, 0, 15380, 16384, 0, 15392, 16384, 0, 15396, 16384, 0, 18688, 34952, 0, 18688, 34952, 0, 18688, 34952, 0, 18688, 34952, 0, 20928, 17, 0, 20928, 17, 0, 24128, 512, 0, 24132, 512, 0, 24144, 512, 0, 24148, 512, 0, 25664, 17476, 0, 25664, 17476, 0, 25664, 17476, 0, 25664, 17476, 0, 29760, 32768, 0, 1360, 33288, 0, 1360, 33288, 0, 1360, 33288, 0, 1376, 33288, 0, 1376, 33288, 0, 1376, 33288, 0, 7936, 18, 0, 7936, 18, 0, 7952, 18, 0, 7952, 18, 0, 7968, 18, 0, 7968, 18, 0, 8256, 18724, 0, 8256, 18724, 0, 8256, 18724, 0, 8256, 18724, 0, 8256, 18724, 0, 11840, 256, 0, 11856, 256, 0, 11872, 256, 0, 12544, 4096, 0, 12560, 4096, 0, 12576, 4096, 0, 13632, 8738, 0, 13632, 8738, 0, 13632, 8738, 0, 13632, 8738, 0, 15360, 16384, 0, 15364, 16384, 0, 15376, 16384, 0, 15380, 16384, 0, 15392, 16384, 0, 15396, 16384, 0, 18688, 34952, 0, 18688, 34952, 0, 18688, 34952, 0, 18688, 34952, 0, 20928, 17, 0, 20928, 17, 0, 24128, 512, 0, 24132, 512, 0, 24144, 512, 0, 24148, 512, 0, 25664, 17476, 0, 25664, 17476, 0, 25664, 17476, 0, 25664, 17476, 0, 29760, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579306688936861_626_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579306688936861_626_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f2a54f07 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579306688936861_626_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,216 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 0))) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((211 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 16384, 0, 4304, 33408, 0, 4304, 33408, 0, 4304, 33408, 0, 4320, 33408, 0, 4320, 33408, 0, 4320, 33408, 0, 4864, 33408, 0, 4864, 33408, 0, 4864, 33408, 0, 6656, 15363, 0, 6656, 15363, 0, 6656, 15363, 0, 6656, 15363, 0, 6656, 15363, 0, 6656, 15363, 0, 8000, 14339, 0, 8000, 14339, 0, 8000, 14339, 0, 8000, 14339, 0, 8000, 14339, 0, 8016, 14339, 0, 8016, 14339, 0, 8016, 14339, 0, 8016, 14339, 0, 8016, 14339, 0, 10112, 6216, 0, 10112, 6216, 0, 10112, 6216, 0, 10112, 6216, 0, 10116, 6216, 0, 10116, 6216, 0, 10116, 6216, 0, 10116, 6216, 0, 10120, 6216, 0, 10120, 6216, 0, 10120, 6216, 0, 10120, 6216, 0, 10128, 6216, 0, 10128, 6216, 0, 10128, 6216, 0, 10128, 6216, 0, 10132, 6216, 0, 10132, 6216, 0, 10132, 6216, 0, 10132, 6216, 0, 10136, 6216, 0, 10136, 6216, 0, 10136, 6216, 0, 10136, 6216, 0, 13504, 1, 0, 13508, 1, 0, 13512, 1, 0, 13520, 1, 0, 13524, 1, 0, 13528, 1, 0, 14144, 84, 0, 14144, 84, 0, 14144, 84, 0, 14160, 84, 0, 14160, 84, 0, 14160, 84, 0, 2368, 16384, 0, 4304, 33408, 0, 4304, 33408, 0, 4304, 33408, 0, 4320, 33408, 0, 4320, 33408, 0, 4320, 33408, 0, 4864, 33408, 0, 4864, 33408, 0, 4864, 33408, 0, 6656, 15363, 0, 6656, 15363, 0, 6656, 15363, 0, 6656, 15363, 0, 6656, 15363, 0, 6656, 15363, 0, 8000, 14339, 0, 8000, 14339, 0, 8000, 14339, 0, 8000, 14339, 0, 8000, 14339, 0, 8016, 14339, 0, 8016, 14339, 0, 8016, 14339, 0, 8016, 14339, 0, 8016, 14339, 0, 10112, 6216, 0, 10112, 6216, 0, 10112, 6216, 0, 10112, 6216, 0, 10116, 6216, 0, 10116, 6216, 0, 10116, 6216, 0, 10116, 6216, 0, 10120, 6216, 0, 10120, 6216, 0, 10120, 6216, 0, 10120, 6216, 0, 10128, 6216, 0, 10128, 6216, 0, 10128, 6216, 0, 10128, 6216, 0, 10132, 6216, 0, 10132, 6216, 0, 10132, 6216, 0, 10132, 6216, 0, 10136, 6216, 0, 10136, 6216, 0, 10136, 6216, 0, 10136, 6216, 0, 13504, 1, 0, 13508, 1, 0, 13512, 1, 0, 13520, 1, 0, 13524, 1, 0, 13528, 1, 0, 14144, 84, 0, 14144, 84, 0, 14144, 84, 0, 14160, 84, 0, 14160, 84, 0, 14160, 84, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579323676683636_627_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579323676683636_627_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..289d79f9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579323676683636_627_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,109 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579323778476228_628_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579323778476228_628_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..02ff7436 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579323778476228_628_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((195 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3780, 512, 0, 3784, 512, 0, 3788, 512, 0, 3796, 512, 0, 3800, 512, 0, 3804, 512, 0, 3812, 512, 0, 3816, 512, 0, 3820, 512, 0, 6160, 8194, 0, 6160, 8194, 0, 6176, 8194, 0, 6176, 8194, 0, 6192, 8194, 0, 6192, 8194, 0, 8016, 8192, 0, 8032, 8192, 0, 8048, 8192, 0, 8320, 32, 0, 9792, 16388, 0, 9792, 16388, 0, 9808, 16388, 0, 9808, 16388, 0, 9824, 16388, 0, 9824, 16388, 0, 10884, 64, 0, 10900, 64, 0, 10916, 64, 0, 11460, 1024, 0, 11476, 1024, 0, 11492, 1024, 0, 11780, 16388, 0, 11780, 16388, 0, 11796, 16388, 0, 11796, 16388, 0, 11812, 16388, 0, 11812, 16388, 0, 14336, 34952, 0, 14336, 34952, 0, 14336, 34952, 0, 14336, 34952, 0, 576, 17, 0, 576, 17, 0, 3780, 512, 0, 3784, 512, 0, 3788, 512, 0, 3796, 512, 0, 3800, 512, 0, 3804, 512, 0, 3812, 512, 0, 3816, 512, 0, 3820, 512, 0, 6160, 8194, 0, 6160, 8194, 0, 6176, 8194, 0, 6176, 8194, 0, 6192, 8194, 0, 6192, 8194, 0, 8016, 8192, 0, 8032, 8192, 0, 8048, 8192, 0, 8320, 32, 0, 9792, 16388, 0, 9792, 16388, 0, 9808, 16388, 0, 9808, 16388, 0, 9824, 16388, 0, 9824, 16388, 0, 10884, 64, 0, 10900, 64, 0, 10916, 64, 0, 11460, 1024, 0, 11476, 1024, 0, 11492, 1024, 0, 11780, 16388, 0, 11780, 16388, 0, 11796, 16388, 0, 11796, 16388, 0, 11812, 16388, 0, 11812, 16388, 0, 14336, 34952, 0, 14336, 34952, 0, 14336, 34952, 0, 14336, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579326349999552_629_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579326349999552_629_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1f73af11 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579326349999552_629_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2432, 10, 0, 2432, 10, 0, 5440, 64, 0, 7808, 1040, 0, 7808, 1040, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2688, 21845, 0, 2432, 10, 0, 2432, 10, 0, 5440, 64, 0, 7808, 1040, 0, 7808, 1040, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0, 8128, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579333903696409_631_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579333903696409_631_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b834955 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579333903696409_631_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 257, 0, 2752, 257, 0, 6144, 16, 0, 6464, 16400, 0, 6464, 16400, 0, 7680, 16, 0, 8320, 73, 0, 8320, 73, 0, 8320, 73, 0, 8896, 1040, 0, 8896, 1040, 0, 9216, 18724, 0, 9216, 18724, 0, 9216, 18724, 0, 9216, 18724, 0, 9216, 18724, 0, 2752, 257, 0, 2752, 257, 0, 6144, 16, 0, 6464, 16400, 0, 6464, 16400, 0, 7680, 16, 0, 8320, 73, 0, 8320, 73, 0, 8320, 73, 0, 8896, 1040, 0, 8896, 1040, 0, 9216, 18724, 0, 9216, 18724, 0, 9216, 18724, 0, 9216, 18724, 0, 9216, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579374918606664_633_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579374918606664_633_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..00e11482 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579374918606664_633_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 468 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 2176, 21504, 0, 2176, 21504, 0, 2176, 21504, 0, 2180, 21504, 0, 2180, 21504, 0, 2180, 21504, 0, 2184, 21504, 0, 2184, 21504, 0, 2184, 21504, 0, 2192, 21504, 0, 2192, 21504, 0, 2192, 21504, 0, 2196, 21504, 0, 2196, 21504, 0, 2196, 21504, 0, 2200, 21504, 0, 2200, 21504, 0, 2200, 21504, 0, 2208, 21504, 0, 2208, 21504, 0, 2208, 21504, 0, 2212, 21504, 0, 2212, 21504, 0, 2212, 21504, 0, 2216, 21504, 0, 2216, 21504, 0, 2216, 21504, 0, 2624, 21504, 0, 2624, 21504, 0, 2624, 21504, 0, 2628, 21504, 0, 2628, 21504, 0, 2628, 21504, 0, 2632, 21504, 0, 2632, 21504, 0, 2632, 21504, 0, 2640, 21504, 0, 2640, 21504, 0, 2640, 21504, 0, 2644, 21504, 0, 2644, 21504, 0, 2644, 21504, 0, 2648, 21504, 0, 2648, 21504, 0, 2648, 21504, 0, 2656, 21504, 0, 2656, 21504, 0, 2656, 21504, 0, 2660, 21504, 0, 2660, 21504, 0, 2660, 21504, 0, 2664, 21504, 0, 2664, 21504, 0, 2664, 21504, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1216, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1232, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 1248, 21845, 0, 2176, 21504, 0, 2176, 21504, 0, 2176, 21504, 0, 2180, 21504, 0, 2180, 21504, 0, 2180, 21504, 0, 2184, 21504, 0, 2184, 21504, 0, 2184, 21504, 0, 2192, 21504, 0, 2192, 21504, 0, 2192, 21504, 0, 2196, 21504, 0, 2196, 21504, 0, 2196, 21504, 0, 2200, 21504, 0, 2200, 21504, 0, 2200, 21504, 0, 2208, 21504, 0, 2208, 21504, 0, 2208, 21504, 0, 2212, 21504, 0, 2212, 21504, 0, 2212, 21504, 0, 2216, 21504, 0, 2216, 21504, 0, 2216, 21504, 0, 2624, 21504, 0, 2624, 21504, 0, 2624, 21504, 0, 2628, 21504, 0, 2628, 21504, 0, 2628, 21504, 0, 2632, 21504, 0, 2632, 21504, 0, 2632, 21504, 0, 2640, 21504, 0, 2640, 21504, 0, 2640, 21504, 0, 2644, 21504, 0, 2644, 21504, 0, 2644, 21504, 0, 2648, 21504, 0, 2648, 21504, 0, 2648, 21504, 0, 2656, 21504, 0, 2656, 21504, 0, 2656, 21504, 0, 2660, 21504, 0, 2660, 21504, 0, 2660, 21504, 0, 2664, 21504, 0, 2664, 21504, 0, 2664, 21504, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579377725200251_634_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579377725200251_634_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..34b3e467 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579377725200251_634_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,334 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 4, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1728, 32, 0, 7552, 4, 0, 9044, 256, 0, 9048, 256, 0, 9060, 256, 0, 9064, 256, 0, 16256, 34952, 0, 16256, 34952, 0, 16256, 34952, 0, 16256, 34952, 0, 16896, 85, 0, 16896, 85, 0, 16896, 85, 0, 16896, 85, 0, 2240, 4, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1984, 64539, 0, 1728, 32, 0, 7552, 4, 0, 9044, 256, 0, 9048, 256, 0, 9060, 256, 0, 9064, 256, 0, 16256, 34952, 0, 16256, 34952, 0, 16256, 34952, 0, 16256, 34952, 0, 16896, 85, 0, 16896, 85, 0, 16896, 85, 0, 16896, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579381888695425_635_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579381888695425_635_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2a799bd9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579381888695425_635_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,100 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 1026, 0, 1152, 1026, 0, 1168, 1026, 0, 1168, 1026, 0, 5504, 2312, 0, 5504, 2312, 0, 5504, 2312, 0, 5520, 2312, 0, 5520, 2312, 0, 5520, 2312, 0, 1152, 1026, 0, 1152, 1026, 0, 1168, 1026, 0, 1168, 1026, 0, 5504, 2312, 0, 5504, 2312, 0, 5504, 2312, 0, 5520, 2312, 0, 5520, 2312, 0, 5520, 2312, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579382273527564_636_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579382273527564_636_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1bf75686 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579382273527564_636_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,387 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((232 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((246 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((253 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((262 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 9)) { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + for (uint i9 = 0; (i9 < 2); i9 = (i9 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((342 << 6) | (i8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((353 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 528 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 1792, 5, 0, 1792, 5, 0, 1808, 5, 0, 1808, 5, 0, 3072, 5, 0, 3072, 5, 0, 3076, 5, 0, 3076, 5, 0, 3088, 5, 0, 3088, 5, 0, 3092, 5, 0, 3092, 5, 0, 3968, 16384, 0, 3984, 16384, 0, 5184, 73, 0, 5184, 73, 0, 5184, 73, 0, 6912, 8264, 0, 6912, 8264, 0, 6912, 8264, 0, 6928, 8264, 0, 6928, 8264, 0, 6928, 8264, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 8704, 4369, 0, 8704, 4369, 0, 8704, 4369, 0, 8704, 4369, 0, 8720, 4369, 0, 8720, 4369, 0, 8720, 4369, 0, 8720, 4369, 0, 8736, 4369, 0, 8736, 4369, 0, 8736, 4369, 0, 8736, 4369, 0, 10368, 1, 0, 10372, 1, 0, 10376, 1, 0, 10384, 1, 0, 10388, 1, 0, 10392, 1, 0, 10400, 1, 0, 10404, 1, 0, 10408, 1, 0, 15748, 17476, 0, 15748, 17476, 0, 15748, 17476, 0, 15748, 17476, 0, 15752, 17476, 0, 15752, 17476, 0, 15752, 17476, 0, 15752, 17476, 0, 15764, 17476, 0, 15764, 17476, 0, 15764, 17476, 0, 15764, 17476, 0, 15768, 17476, 0, 15768, 17476, 0, 15768, 17476, 0, 15768, 17476, 0, 18944, 32768, 0, 18960, 32768, 0, 20160, 32768, 0, 21888, 1024, 0, 21892, 1024, 0, 21904, 1024, 0, 21908, 1024, 0, 21920, 1024, 0, 21924, 1024, 0, 22592, 1024, 0, 22608, 1024, 0, 22624, 1024, 0, 22912, 18436, 0, 22912, 18436, 0, 22912, 18436, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 1792, 5, 0, 1792, 5, 0, 1808, 5, 0, 1808, 5, 0, 3072, 5, 0, 3072, 5, 0, 3076, 5, 0, 3076, 5, 0, 3088, 5, 0, 3088, 5, 0, 3092, 5, 0, 3092, 5, 0, 3968, 16384, 0, 3984, 16384, 0, 5184, 73, 0, 5184, 73, 0, 5184, 73, 0, 6912, 8264, 0, 6912, 8264, 0, 6912, 8264, 0, 6928, 8264, 0, 6928, 8264, 0, 6928, 8264, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 7424, 18724, 0, 8704, 4369, 0, 8704, 4369, 0, 8704, 4369, 0, 8704, 4369, 0, 8720, 4369, 0, 8720, 4369, 0, 8720, 4369, 0, 8720, 4369, 0, 8736, 4369, 0, 8736, 4369, 0, 8736, 4369, 0, 8736, 4369, 0, 10368, 1, 0, 10372, 1, 0, 10376, 1, 0, 10384, 1, 0, 10388, 1, 0, 10392, 1, 0, 10400, 1, 0, 10404, 1, 0, 10408, 1, 0, 15748, 17476, 0, 15748, 17476, 0, 15748, 17476, 0, 15748, 17476, 0, 15752, 17476, 0, 15752, 17476, 0, 15752, 17476, 0, 15752, 17476, 0, 15764, 17476, 0, 15764, 17476, 0, 15764, 17476, 0, 15764, 17476, 0, 15768, 17476, 0, 15768, 17476, 0, 15768, 17476, 0, 15768, 17476, 0, 18944, 32768, 0, 18960, 32768, 0, 20160, 32768, 0, 21888, 1024, 0, 21892, 1024, 0, 21904, 1024, 0, 21908, 1024, 0, 21920, 1024, 0, 21924, 1024, 0, 22592, 1024, 0, 22608, 1024, 0, 22624, 1024, 0, 22912, 18436, 0, 22912, 18436, 0, 22912, 18436, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579423680797379_637_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579423680797379_637_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..711d5a18 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579423680797379_637_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,313 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((180 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((233 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 4)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 2112, 8, 0, 8256, 2048, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 11524, 640, 0, 11524, 640, 0, 11528, 640, 0, 11528, 640, 0, 11532, 640, 0, 11532, 640, 0, 11540, 640, 0, 11540, 640, 0, 11544, 640, 0, 11544, 640, 0, 11548, 640, 0, 11548, 640, 0, 15744, 17, 0, 15744, 17, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17728, 34952, 0, 17728, 34952, 0, 17728, 34952, 0, 17728, 34952, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 2112, 8, 0, 8256, 2048, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10368, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 10384, 43690, 0, 11524, 640, 0, 11524, 640, 0, 11528, 640, 0, 11528, 640, 0, 11532, 640, 0, 11532, 640, 0, 11540, 640, 0, 11540, 640, 0, 11544, 640, 0, 11544, 640, 0, 11548, 640, 0, 11548, 640, 0, 15744, 17, 0, 15744, 17, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17280, 26214, 0, 17728, 34952, 0, 17728, 34952, 0, 17728, 34952, 0, 17728, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579430888450391_639_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579430888450391_639_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..097a9d2a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579430888450391_639_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3328, 16388, 0, 3328, 16388, 0, 5952, 34952, 0, 5952, 34952, 0, 5952, 34952, 0, 5952, 34952, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 8592, 160, 0, 8592, 160, 0, 8608, 160, 0, 8608, 160, 0, 8624, 160, 0, 8624, 160, 0, 576, 17, 0, 576, 17, 0, 3328, 16388, 0, 3328, 16388, 0, 5952, 34952, 0, 5952, 34952, 0, 5952, 34952, 0, 5952, 34952, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 7168, 43690, 0, 8592, 160, 0, 8592, 160, 0, 8608, 160, 0, 8608, 160, 0, 8624, 160, 0, 8624, 160, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579431377857679_640_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579431377857679_640_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..61619608 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579431377857679_640_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 16389, 0, 1344, 16389, 0, 1344, 16389, 0, 1360, 16389, 0, 1360, 16389, 0, 1360, 16389, 0, 2756, 4, 0, 2760, 4, 0, 2764, 4, 0, 2772, 4, 0, 2776, 4, 0, 2780, 4, 0, 1344, 16389, 0, 1344, 16389, 0, 1344, 16389, 0, 1360, 16389, 0, 1360, 16389, 0, 1360, 16389, 0, 2756, 4, 0, 2760, 4, 0, 2764, 4, 0, 2772, 4, 0, 2776, 4, 0, 2780, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579431885889535_641_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579431885889535_641_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c3bb895f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579431885889535_641_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,278 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 8)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2128, 18, 0, 2128, 18, 0, 2144, 18, 0, 2144, 18, 0, 2752, 18724, 0, 2752, 18724, 0, 2752, 18724, 0, 2752, 18724, 0, 2752, 18724, 0, 3584, 1, 0, 13760, 34952, 0, 13760, 34952, 0, 13760, 34952, 0, 13760, 34952, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2128, 18, 0, 2128, 18, 0, 2144, 18, 0, 2144, 18, 0, 2752, 18724, 0, 2752, 18724, 0, 2752, 18724, 0, 2752, 18724, 0, 2752, 18724, 0, 3584, 1, 0, 13760, 34952, 0, 13760, 34952, 0, 13760, 34952, 0, 13760, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579432398576271_642_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579432398576271_642_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dba7c46c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579432398576271_642_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((235 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 1, 0, 2320, 1, 0, 5312, 1, 0, 5328, 1, 0, 9344, 16644, 0, 9344, 16644, 0, 9344, 16644, 0, 2304, 1, 0, 2320, 1, 0, 5312, 1, 0, 5328, 1, 0, 9344, 16644, 0, 9344, 16644, 0, 9344, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579434303342249_643_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579434303342249_643_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a784d38 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579434303342249_643_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,261 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 5))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5824, 46592, 0, 5824, 46592, 0, 5824, 46592, 0, 5824, 46592, 0, 5824, 46592, 0, 5840, 46592, 0, 5840, 46592, 0, 5840, 46592, 0, 5840, 46592, 0, 5840, 46592, 0, 7168, 4, 0, 8128, 4, 0, 8144, 4, 0, 9216, 17, 0, 9216, 17, 0, 10112, 17476, 0, 10112, 17476, 0, 10112, 17476, 0, 10112, 17476, 0, 10752, 8, 0, 11712, 2048, 0, 5824, 46592, 0, 5824, 46592, 0, 5824, 46592, 0, 5824, 46592, 0, 5824, 46592, 0, 5840, 46592, 0, 5840, 46592, 0, 5840, 46592, 0, 5840, 46592, 0, 5840, 46592, 0, 7168, 4, 0, 8128, 4, 0, 8144, 4, 0, 9216, 17, 0, 9216, 17, 0, 10112, 17476, 0, 10112, 17476, 0, 10112, 17476, 0, 10112, 17476, 0, 10752, 8, 0, 11712, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579439073090164_644_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579439073090164_644_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9c4bc60a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579439073090164_644_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((191 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((200 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 516 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 4992, 2080, 0, 4992, 2080, 0, 5008, 2080, 0, 5008, 2080, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 11088, 16405, 0, 11088, 16405, 0, 11088, 16405, 0, 11088, 16405, 0, 11104, 16405, 0, 11104, 16405, 0, 11104, 16405, 0, 11104, 16405, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 13264, 1024, 0, 13280, 1024, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 4992, 2080, 0, 4992, 2080, 0, 5008, 2080, 0, 5008, 2080, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6544, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 6560, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10256, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 10272, 43690, 0, 11088, 16405, 0, 11088, 16405, 0, 11088, 16405, 0, 11088, 16405, 0, 11104, 16405, 0, 11104, 16405, 0, 11104, 16405, 0, 11104, 16405, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12244, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12248, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12260, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 12264, 21845, 0, 13264, 1024, 0, 13280, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579685049365801_646_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579685049365801_646_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6043a0d9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579685049365801_646_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2752, 32768, 0, 2768, 32768, 0, 4224, 520, 0, 4224, 520, 0, 4240, 520, 0, 4240, 520, 0, 6784, 2080, 0, 6784, 2080, 0, 6800, 2080, 0, 6800, 2080, 0, 7424, 85, 0, 7424, 85, 0, 7424, 85, 0, 7424, 85, 0, 8064, 8, 0, 8960, 2080, 0, 8960, 2080, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2752, 32768, 0, 2768, 32768, 0, 4224, 520, 0, 4224, 520, 0, 4240, 520, 0, 4240, 520, 0, 6784, 2080, 0, 6784, 2080, 0, 6800, 2080, 0, 6800, 2080, 0, 7424, 85, 0, 7424, 85, 0, 7424, 85, 0, 7424, 85, 0, 8064, 8, 0, 8960, 2080, 0, 8960, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579707667234547_649_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579707667234547_649_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ddd9d6a0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579707667234547_649_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,365 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((203 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((219 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 3264, 1, 0, 11776, 8192, 0, 11780, 8192, 0, 11792, 8192, 0, 11796, 8192, 0, 13312, 32, 0, 13316, 32, 0, 13328, 32, 0, 13332, 32, 0, 14016, 8192, 0, 14020, 8192, 0, 14032, 8192, 0, 14036, 8192, 0, 14528, 17476, 0, 14528, 17476, 0, 14528, 17476, 0, 14528, 17476, 0, 14976, 34952, 0, 14976, 34952, 0, 14976, 34952, 0, 14976, 34952, 0, 15616, 17, 0, 15616, 17, 0, 16192, 4369, 0, 16192, 4369, 0, 16192, 4369, 0, 16192, 4369, 0, 16832, 4, 0, 18048, 4, 0, 18064, 4, 0, 18080, 4, 0, 18752, 4, 0, 18768, 4, 0, 18784, 4, 0, 1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 1088, 61447, 0, 3264, 1, 0, 11776, 8192, 0, 11780, 8192, 0, 11792, 8192, 0, 11796, 8192, 0, 13312, 32, 0, 13316, 32, 0, 13328, 32, 0, 13332, 32, 0, 14016, 8192, 0, 14020, 8192, 0, 14032, 8192, 0, 14036, 8192, 0, 14528, 17476, 0, 14528, 17476, 0, 14528, 17476, 0, 14528, 17476, 0, 14976, 34952, 0, 14976, 34952, 0, 14976, 34952, 0, 14976, 34952, 0, 15616, 17, 0, 15616, 17, 0, 16192, 4369, 0, 16192, 4369, 0, 16192, 4369, 0, 16192, 4369, 0, 16832, 4, 0, 18048, 4, 0, 18064, 4, 0, 18080, 4, 0, 18752, 4, 0, 18768, 4, 0, 18784, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579713125703302_651_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579713125703302_651_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d86514c4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579713125703302_651_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,291 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((254 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2368, 73, 0, 2368, 73, 0, 2368, 73, 0, 2944, 1040, 0, 2944, 1040, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3904, 17, 0, 3904, 17, 0, 5056, 8194, 0, 5056, 8194, 0, 9344, 17476, 0, 9344, 17476, 0, 9344, 17476, 0, 9344, 17476, 0, 10496, 32768, 0, 11712, 32768, 0, 11728, 32768, 0, 14208, 32768, 0, 14224, 32768, 0, 17216, 8, 0, 17232, 8, 0, 17248, 8, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2368, 73, 0, 2368, 73, 0, 2368, 73, 0, 2944, 1040, 0, 2944, 1040, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3264, 18724, 0, 3904, 17, 0, 3904, 17, 0, 5056, 8194, 0, 5056, 8194, 0, 9344, 17476, 0, 9344, 17476, 0, 9344, 17476, 0, 9344, 17476, 0, 10496, 32768, 0, 11712, 32768, 0, 11728, 32768, 0, 14208, 32768, 0, 14224, 32768, 0, 17216, 8, 0, 17232, 8, 0, 17248, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579714390359343_652_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579714390359343_652_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab6473f8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579714390359343_652_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,152 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 456 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2452, 5284, 0, 2452, 5284, 0, 2452, 5284, 0, 2452, 5284, 0, 2452, 5284, 0, 2456, 5284, 0, 2456, 5284, 0, 2456, 5284, 0, 2456, 5284, 0, 2456, 5284, 0, 2468, 5284, 0, 2468, 5284, 0, 2468, 5284, 0, 2468, 5284, 0, 2468, 5284, 0, 2472, 5284, 0, 2472, 5284, 0, 2472, 5284, 0, 2472, 5284, 0, 2472, 5284, 0, 3092, 17, 0, 3092, 17, 0, 3096, 17, 0, 3096, 17, 0, 3108, 17, 0, 3108, 17, 0, 3112, 17, 0, 3112, 17, 0, 3988, 17476, 0, 3988, 17476, 0, 3988, 17476, 0, 3988, 17476, 0, 3992, 17476, 0, 3992, 17476, 0, 3992, 17476, 0, 3992, 17476, 0, 4004, 17476, 0, 4004, 17476, 0, 4004, 17476, 0, 4004, 17476, 0, 4008, 17476, 0, 4008, 17476, 0, 4008, 17476, 0, 4008, 17476, 0, 4436, 34952, 0, 4436, 34952, 0, 4436, 34952, 0, 4436, 34952, 0, 4440, 34952, 0, 4440, 34952, 0, 4440, 34952, 0, 4440, 34952, 0, 4452, 34952, 0, 4452, 34952, 0, 4452, 34952, 0, 4452, 34952, 0, 4456, 34952, 0, 4456, 34952, 0, 4456, 34952, 0, 4456, 34952, 0, 5652, 24608, 0, 5652, 24608, 0, 5652, 24608, 0, 5656, 24608, 0, 5656, 24608, 0, 5656, 24608, 0, 5668, 24608, 0, 5668, 24608, 0, 5668, 24608, 0, 5672, 24608, 0, 5672, 24608, 0, 5672, 24608, 0, 6096, 3, 0, 6096, 3, 0, 6112, 3, 0, 6112, 3, 0, 2452, 5284, 0, 2452, 5284, 0, 2452, 5284, 0, 2452, 5284, 0, 2452, 5284, 0, 2456, 5284, 0, 2456, 5284, 0, 2456, 5284, 0, 2456, 5284, 0, 2456, 5284, 0, 2468, 5284, 0, 2468, 5284, 0, 2468, 5284, 0, 2468, 5284, 0, 2468, 5284, 0, 2472, 5284, 0, 2472, 5284, 0, 2472, 5284, 0, 2472, 5284, 0, 2472, 5284, 0, 3092, 17, 0, 3092, 17, 0, 3096, 17, 0, 3096, 17, 0, 3108, 17, 0, 3108, 17, 0, 3112, 17, 0, 3112, 17, 0, 3988, 17476, 0, 3988, 17476, 0, 3988, 17476, 0, 3988, 17476, 0, 3992, 17476, 0, 3992, 17476, 0, 3992, 17476, 0, 3992, 17476, 0, 4004, 17476, 0, 4004, 17476, 0, 4004, 17476, 0, 4004, 17476, 0, 4008, 17476, 0, 4008, 17476, 0, 4008, 17476, 0, 4008, 17476, 0, 4436, 34952, 0, 4436, 34952, 0, 4436, 34952, 0, 4436, 34952, 0, 4440, 34952, 0, 4440, 34952, 0, 4440, 34952, 0, 4440, 34952, 0, 4452, 34952, 0, 4452, 34952, 0, 4452, 34952, 0, 4452, 34952, 0, 4456, 34952, 0, 4456, 34952, 0, 4456, 34952, 0, 4456, 34952, 0, 5652, 24608, 0, 5652, 24608, 0, 5652, 24608, 0, 5656, 24608, 0, 5656, 24608, 0, 5656, 24608, 0, 5668, 24608, 0, 5668, 24608, 0, 5668, 24608, 0, 5672, 24608, 0, 5672, 24608, 0, 5672, 24608, 0, 6096, 3, 0, 6096, 3, 0, 6112, 3, 0, 6112, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579726996410725_653_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579726996410725_653_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a228ddf --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579726996410725_653_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,280 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1216, 73, 0, 1216, 73, 0, 1216, 73, 0, 3328, 1040, 0, 3328, 1040, 0, 3344, 1040, 0, 3344, 1040, 0, 3360, 1040, 0, 3360, 1040, 0, 5056, 1024, 0, 5072, 1024, 0, 5088, 1024, 0, 12032, 4, 0, 13248, 85, 0, 13248, 85, 0, 13248, 85, 0, 13248, 85, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 15360, 7, 0, 15360, 7, 0, 15360, 7, 0, 14976, 8712, 0, 14976, 8712, 0, 14976, 8712, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1216, 73, 0, 1216, 73, 0, 1216, 73, 0, 3328, 1040, 0, 3328, 1040, 0, 3344, 1040, 0, 3344, 1040, 0, 3360, 1040, 0, 3360, 1040, 0, 5056, 1024, 0, 5072, 1024, 0, 5088, 1024, 0, 12032, 4, 0, 13248, 85, 0, 13248, 85, 0, 13248, 85, 0, 13248, 85, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 13824, 21845, 0, 15360, 7, 0, 15360, 7, 0, 15360, 7, 0, 14976, 8712, 0, 14976, 8712, 0, 14976, 8712, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579728736645669_654_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579728736645669_654_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..49eb9590 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579728736645669_654_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,476 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((367 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((382 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 2)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((468 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (482 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (497 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (502 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9152, 4161, 0, 9152, 4161, 0, 9152, 4161, 0, 9728, 1040, 0, 9728, 1040, 0, 10896, 4, 0, 10912, 4, 0, 16848, 16384, 0, 16864, 16384, 0, 18384, 16388, 0, 18384, 16388, 0, 18400, 16388, 0, 18400, 16388, 0, 18832, 2048, 0, 18848, 2048, 0, 19664, 16384, 0, 19680, 16384, 0, 20368, 16384, 0, 20384, 16384, 0, 21952, 21509, 0, 21952, 21509, 0, 21952, 21509, 0, 21952, 21509, 0, 21952, 21509, 0, 21968, 21509, 0, 21968, 21509, 0, 21968, 21509, 0, 21968, 21509, 0, 21968, 21509, 0, 21984, 21509, 0, 21984, 21509, 0, 21984, 21509, 0, 21984, 21509, 0, 21984, 21509, 0, 23492, 4164, 0, 23492, 4164, 0, 23492, 4164, 0, 23508, 4164, 0, 23508, 4164, 0, 23508, 4164, 0, 23524, 4164, 0, 23524, 4164, 0, 23524, 4164, 0, 24452, 4164, 0, 24452, 4164, 0, 24452, 4164, 0, 24468, 4164, 0, 24468, 4164, 0, 24468, 4164, 0, 24484, 4164, 0, 24484, 4164, 0, 24484, 4164, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 26240, 73, 0, 26240, 73, 0, 26240, 73, 0, 28800, 1026, 0, 28800, 1026, 0, 32128, 18724, 0, 32128, 18724, 0, 32128, 18724, 0, 32128, 18724, 0, 32128, 18724, 0, 9152, 4161, 0, 9152, 4161, 0, 9152, 4161, 0, 9728, 1040, 0, 9728, 1040, 0, 10896, 4, 0, 10912, 4, 0, 16848, 16384, 0, 16864, 16384, 0, 18384, 16388, 0, 18384, 16388, 0, 18400, 16388, 0, 18400, 16388, 0, 18832, 2048, 0, 18848, 2048, 0, 19664, 16384, 0, 19680, 16384, 0, 20368, 16384, 0, 20384, 16384, 0, 21952, 21509, 0, 21952, 21509, 0, 21952, 21509, 0, 21952, 21509, 0, 21952, 21509, 0, 21968, 21509, 0, 21968, 21509, 0, 21968, 21509, 0, 21968, 21509, 0, 21968, 21509, 0, 21984, 21509, 0, 21984, 21509, 0, 21984, 21509, 0, 21984, 21509, 0, 21984, 21509, 0, 23492, 4164, 0, 23492, 4164, 0, 23492, 4164, 0, 23508, 4164, 0, 23508, 4164, 0, 23508, 4164, 0, 23524, 4164, 0, 23524, 4164, 0, 23524, 4164, 0, 24452, 4164, 0, 24452, 4164, 0, 24452, 4164, 0, 24468, 4164, 0, 24468, 4164, 0, 24468, 4164, 0, 24484, 4164, 0, 24484, 4164, 0, 24484, 4164, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 25600, 21845, 0, 26240, 73, 0, 26240, 73, 0, 26240, 73, 0, 28800, 1026, 0, 28800, 1026, 0, 32128, 18724, 0, 32128, 18724, 0, 32128, 18724, 0, 32128, 18724, 0, 32128, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579754406282989_655_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579754406282989_655_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6104794a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579754406282989_655_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,219 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((252 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2320, 1, 0, 2336, 1, 0, 2352, 1, 0, 3024, 1, 0, 3040, 1, 0, 3056, 1, 0, 3584, 4369, 0, 3584, 4369, 0, 3584, 4369, 0, 3584, 4369, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 7056, 16400, 0, 7056, 16400, 0, 7072, 16400, 0, 7072, 16400, 0, 7088, 16400, 0, 7088, 16400, 0, 8656, 4608, 0, 8656, 4608, 0, 8672, 4608, 0, 8672, 4608, 0, 8688, 4608, 0, 8688, 4608, 0, 10128, 16680, 0, 10128, 16680, 0, 10128, 16680, 0, 10128, 16680, 0, 10144, 16680, 0, 10144, 16680, 0, 10144, 16680, 0, 10144, 16680, 0, 10160, 16680, 0, 10160, 16680, 0, 10160, 16680, 0, 10160, 16680, 0, 14096, 160, 0, 14096, 160, 0, 14112, 160, 0, 14112, 160, 0, 14128, 160, 0, 14128, 160, 0, 15248, 4, 0, 15264, 4, 0, 15280, 4, 0, 17360, 8320, 0, 17360, 8320, 0, 17376, 8320, 0, 17376, 8320, 0, 17392, 8320, 0, 17392, 8320, 0, 18304, 128, 0, 2320, 1, 0, 2336, 1, 0, 2352, 1, 0, 3024, 1, 0, 3040, 1, 0, 3056, 1, 0, 3584, 4369, 0, 3584, 4369, 0, 3584, 4369, 0, 3584, 4369, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 3904, 30583, 0, 7056, 16400, 0, 7056, 16400, 0, 7072, 16400, 0, 7072, 16400, 0, 7088, 16400, 0, 7088, 16400, 0, 8656, 4608, 0, 8656, 4608, 0, 8672, 4608, 0, 8672, 4608, 0, 8688, 4608, 0, 8688, 4608, 0, 10128, 16680, 0, 10128, 16680, 0, 10128, 16680, 0, 10128, 16680, 0, 10144, 16680, 0, 10144, 16680, 0, 10144, 16680, 0, 10144, 16680, 0, 10160, 16680, 0, 10160, 16680, 0, 10160, 16680, 0, 10160, 16680, 0, 14096, 160, 0, 14096, 160, 0, 14112, 160, 0, 14112, 160, 0, 14128, 160, 0, 14128, 160, 0, 15248, 4, 0, 15264, 4, 0, 15280, 4, 0, 17360, 8320, 0, 17360, 8320, 0, 17376, 8320, 0, 17376, 8320, 0, 17392, 8320, 0, 17392, 8320, 0, 18304, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579772666308181_656_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579772666308181_656_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..13668b07 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579772666308181_656_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,244 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 15))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((207 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2512, 324, 0, 2512, 324, 0, 2512, 324, 0, 2528, 324, 0, 2528, 324, 0, 2528, 324, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 7616, 10, 0, 7616, 10, 0, 10304, 256, 0, 10944, 73, 0, 10944, 73, 0, 10944, 73, 0, 13264, 2, 0, 13268, 2, 0, 13272, 2, 0, 13280, 2, 0, 13284, 2, 0, 13288, 2, 0, 13296, 2, 0, 13300, 2, 0, 13304, 2, 0, 2512, 324, 0, 2512, 324, 0, 2512, 324, 0, 2528, 324, 0, 2528, 324, 0, 2528, 324, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5504, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5520, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 5536, 43690, 0, 7616, 10, 0, 7616, 10, 0, 10304, 256, 0, 10944, 73, 0, 10944, 73, 0, 10944, 73, 0, 13264, 2, 0, 13268, 2, 0, 13272, 2, 0, 13280, 2, 0, 13284, 2, 0, 13288, 2, 0, 13296, 2, 0, 13300, 2, 0, 13304, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579794340948733_657_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579794340948733_657_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..80b308bf --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579794340948733_657_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1024, 5, 0, 1024, 5, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1664, 43690, 0, 1024, 5, 0, 1024, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579794444843652_658_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579794444843652_658_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..89d239ce --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579794444843652_658_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,580 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 10)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 0))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (381 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (390 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((430 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((445 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (454 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (464 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (477 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (484 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (493 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (498 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 2240, 1040, 0, 2240, 1040, 0, 2560, 18724, 0, 2560, 18724, 0, 2560, 18724, 0, 2560, 18724, 0, 2560, 18724, 0, 16784, 32, 0, 17680, 32, 0, 18256, 32, 0, 19968, 17476, 0, 19968, 17476, 0, 19968, 17476, 0, 19968, 17476, 0, 20608, 8, 0, 29696, 85, 0, 29696, 85, 0, 29696, 85, 0, 29696, 85, 0, 31872, 2080, 0, 31872, 2080, 0, 768, 73, 0, 768, 73, 0, 768, 73, 0, 2240, 1040, 0, 2240, 1040, 0, 2560, 18724, 0, 2560, 18724, 0, 2560, 18724, 0, 2560, 18724, 0, 2560, 18724, 0, 16784, 32, 0, 17680, 32, 0, 18256, 32, 0, 19968, 17476, 0, 19968, 17476, 0, 19968, 17476, 0, 19968, 17476, 0, 20608, 8, 0, 29696, 85, 0, 29696, 85, 0, 29696, 85, 0, 29696, 85, 0, 31872, 2080, 0, 31872, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579798838369398_659_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579798838369398_659_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..69635182 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579798838369398_659_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 2624, 32800, 0, 2624, 32800, 0, 2240, 10890, 0, 2240, 10890, 0, 2240, 10890, 0, 2240, 10890, 0, 2240, 10890, 0, 2240, 10890, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 3264, 21845, 0, 2624, 32800, 0, 2624, 32800, 0, 2240, 10890, 0, 2240, 10890, 0, 2240, 10890, 0, 2240, 10890, 0, 2240, 10890, 0, 2240, 10890, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579798981816709_660_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579798981816709_660_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7e80587c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579798981816709_660_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,264 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 36873, 0, 1296, 36873, 0, 1296, 36873, 0, 1296, 36873, 0, 2384, 4096, 0, 2388, 4096, 0, 2392, 4096, 0, 3024, 73, 0, 3024, 73, 0, 3024, 73, 0, 3028, 73, 0, 3028, 73, 0, 3028, 73, 0, 3032, 73, 0, 3032, 73, 0, 3032, 73, 0, 4672, 1040, 0, 4672, 1040, 0, 4992, 18724, 0, 4992, 18724, 0, 4992, 18724, 0, 4992, 18724, 0, 4992, 18724, 0, 1296, 36873, 0, 1296, 36873, 0, 1296, 36873, 0, 1296, 36873, 0, 2384, 4096, 0, 2388, 4096, 0, 2392, 4096, 0, 3024, 73, 0, 3024, 73, 0, 3024, 73, 0, 3028, 73, 0, 3028, 73, 0, 3028, 73, 0, 3032, 73, 0, 3032, 73, 0, 3032, 73, 0, 4672, 1040, 0, 4672, 1040, 0, 4992, 18724, 0, 4992, 18724, 0, 4992, 18724, 0, 4992, 18724, 0, 4992, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579801282536914_661_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579801282536914_661_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0e58875f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579801282536914_661_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 16672, 0, 2240, 16672, 0, 2240, 16672, 0, 6976, 32, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 2240, 16672, 0, 2240, 16672, 0, 2240, 16672, 0, 6976, 32, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0, 8192, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579801495964571_662_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579801495964571_662_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4431f0e7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579801495964571_662_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,337 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 408 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4352, 2, 0, 4368, 2, 0, 5520, 17476, 0, 5520, 17476, 0, 5520, 17476, 0, 5520, 17476, 0, 5536, 17476, 0, 5536, 17476, 0, 5536, 17476, 0, 5536, 17476, 0, 5952, 34952, 0, 5952, 34952, 0, 5952, 34952, 0, 5952, 34952, 0, 6848, 17, 0, 6848, 17, 0, 8016, 8738, 0, 8016, 8738, 0, 8016, 8738, 0, 8016, 8738, 0, 8032, 8738, 0, 8032, 8738, 0, 8032, 8738, 0, 8032, 8738, 0, 11472, 8194, 0, 11472, 8194, 0, 11488, 8194, 0, 11488, 8194, 0, 12048, 4369, 0, 12048, 4369, 0, 12048, 4369, 0, 12048, 4369, 0, 12064, 4369, 0, 12064, 4369, 0, 12064, 4369, 0, 12064, 4369, 0, 13264, 17476, 0, 13264, 17476, 0, 13264, 17476, 0, 13264, 17476, 0, 13280, 17476, 0, 13280, 17476, 0, 13280, 17476, 0, 13280, 17476, 0, 14336, 34952, 0, 14336, 34952, 0, 14336, 34952, 0, 14336, 34952, 0, 14352, 34952, 0, 14352, 34952, 0, 14352, 34952, 0, 14352, 34952, 0, 17280, 32776, 0, 17280, 32776, 0, 17296, 32776, 0, 17296, 32776, 0, 17984, 32768, 0, 18000, 32768, 0, 18560, 34952, 0, 18560, 34952, 0, 18560, 34952, 0, 18560, 34952, 0, 18576, 34952, 0, 18576, 34952, 0, 18576, 34952, 0, 18576, 34952, 0, 576, 17, 0, 576, 17, 0, 4352, 2, 0, 4368, 2, 0, 5520, 17476, 0, 5520, 17476, 0, 5520, 17476, 0, 5520, 17476, 0, 5536, 17476, 0, 5536, 17476, 0, 5536, 17476, 0, 5536, 17476, 0, 5952, 34952, 0, 5952, 34952, 0, 5952, 34952, 0, 5952, 34952, 0, 6848, 17, 0, 6848, 17, 0, 8016, 8738, 0, 8016, 8738, 0, 8016, 8738, 0, 8016, 8738, 0, 8032, 8738, 0, 8032, 8738, 0, 8032, 8738, 0, 8032, 8738, 0, 11472, 8194, 0, 11472, 8194, 0, 11488, 8194, 0, 11488, 8194, 0, 12048, 4369, 0, 12048, 4369, 0, 12048, 4369, 0, 12048, 4369, 0, 12064, 4369, 0, 12064, 4369, 0, 12064, 4369, 0, 12064, 4369, 0, 13264, 17476, 0, 13264, 17476, 0, 13264, 17476, 0, 13264, 17476, 0, 13280, 17476, 0, 13280, 17476, 0, 13280, 17476, 0, 13280, 17476, 0, 14336, 34952, 0, 14336, 34952, 0, 14336, 34952, 0, 14336, 34952, 0, 14352, 34952, 0, 14352, 34952, 0, 14352, 34952, 0, 14352, 34952, 0, 17280, 32776, 0, 17280, 32776, 0, 17296, 32776, 0, 17296, 32776, 0, 17984, 32768, 0, 18000, 32768, 0, 18560, 34952, 0, 18560, 34952, 0, 18560, 34952, 0, 18560, 34952, 0, 18576, 34952, 0, 18576, 34952, 0, 18576, 34952, 0, 18576, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579809183892945_664_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579809183892945_664_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a01b47c5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579809183892945_664_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,299 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4288, 16, 0, 9680, 512, 0, 9696, 512, 0, 9712, 512, 0, 11968, 17476, 0, 11968, 17476, 0, 11968, 17476, 0, 11968, 17476, 0, 12416, 34952, 0, 12416, 34952, 0, 12416, 34952, 0, 12416, 34952, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 17472, 3, 0, 17472, 3, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 4288, 16, 0, 9680, 512, 0, 9696, 512, 0, 9712, 512, 0, 11968, 17476, 0, 11968, 17476, 0, 11968, 17476, 0, 11968, 17476, 0, 12416, 34952, 0, 12416, 34952, 0, 12416, 34952, 0, 12416, 34952, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 15360, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 16064, 43690, 0, 17472, 3, 0, 17472, 3, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0, 17088, 64540, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579810895021134_665_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579810895021134_665_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eff1d30e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579810895021134_665_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,368 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((275 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1424, 1024, 0, 2324, 128, 0, 2328, 128, 0, 2896, 1024, 0, 4928, 73, 0, 4928, 73, 0, 4928, 73, 0, 6912, 8320, 0, 6912, 8320, 0, 6928, 8320, 0, 6928, 8320, 0, 8192, 16, 0, 8208, 16, 0, 17620, 18432, 0, 17620, 18432, 0, 17624, 18432, 0, 17624, 18432, 0, 17636, 18432, 0, 17636, 18432, 0, 17640, 18432, 0, 17640, 18432, 0, 18240, 85, 0, 18240, 85, 0, 18240, 85, 0, 18240, 85, 0, 1424, 1024, 0, 2324, 128, 0, 2328, 128, 0, 2896, 1024, 0, 4928, 73, 0, 4928, 73, 0, 4928, 73, 0, 6912, 8320, 0, 6912, 8320, 0, 6928, 8320, 0, 6928, 8320, 0, 8192, 16, 0, 8208, 16, 0, 17620, 18432, 0, 17620, 18432, 0, 17624, 18432, 0, 17624, 18432, 0, 17636, 18432, 0, 17636, 18432, 0, 17640, 18432, 0, 17640, 18432, 0, 18240, 85, 0, 18240, 85, 0, 18240, 85, 0, 18240, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579829485965302_667_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579829485965302_667_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7be29515 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579829485965302_667_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,111 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 384 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 4176, 2056, 0, 4176, 2056, 0, 4192, 2056, 0, 4192, 2056, 0, 4208, 2056, 0, 4208, 2056, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1552, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1568, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 1584, 63503, 0, 4176, 2056, 0, 4176, 2056, 0, 4192, 2056, 0, 4192, 2056, 0, 4208, 2056, 0, 4208, 2056, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4880, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4896, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 4912, 61455, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0, 5568, 57359, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579830678967902_669_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579830678967902_669_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0b57ae9a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579830678967902_669_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 2120, 0, 1856, 2120, 0, 1856, 2120, 0, 1472, 32896, 0, 1472, 32896, 0, 1856, 2120, 0, 1856, 2120, 0, 1856, 2120, 0, 1472, 32896, 0, 1472, 32896, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579831342291239_671_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579831342291239_671_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a867c9bc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579831342291239_671_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1920, 32768, 0, 3088, 32768, 0, 3104, 32768, 0, 3792, 32768, 0, 3808, 32768, 0, 5312, 520, 0, 5312, 520, 0, 5328, 520, 0, 5328, 520, 0, 10576, 32768, 0, 10592, 32768, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1920, 32768, 0, 3088, 32768, 0, 3104, 32768, 0, 3792, 32768, 0, 3808, 32768, 0, 5312, 520, 0, 5312, 520, 0, 5328, 520, 0, 5328, 520, 0, 10576, 32768, 0, 10592, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579832167738907_672_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579832167738907_672_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5921bf91 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579832167738907_672_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 16, 0, 1056, 16, 0, 1940, 4096, 0, 1944, 4096, 0, 1956, 4096, 0, 1960, 4096, 0, 2816, 17476, 0, 2816, 17476, 0, 2816, 17476, 0, 2816, 17476, 0, 3264, 34952, 0, 3264, 34952, 0, 3264, 34952, 0, 3264, 34952, 0, 1040, 16, 0, 1056, 16, 0, 1940, 4096, 0, 1944, 4096, 0, 1956, 4096, 0, 1960, 4096, 0, 2816, 17476, 0, 2816, 17476, 0, 2816, 17476, 0, 2816, 17476, 0, 3264, 34952, 0, 3264, 34952, 0, 3264, 34952, 0, 3264, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579832529047052_673_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579832529047052_673_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..73d0185e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579832529047052_673_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9728, 2, 0, 9744, 2, 0, 11456, 8192, 0, 11460, 8192, 0, 11464, 8192, 0, 11472, 8192, 0, 11476, 8192, 0, 11480, 8192, 0, 12544, 8738, 0, 12544, 8738, 0, 12544, 8738, 0, 12544, 8738, 0, 12560, 8738, 0, 12560, 8738, 0, 12560, 8738, 0, 12560, 8738, 0, 12576, 8738, 0, 12576, 8738, 0, 12576, 8738, 0, 12576, 8738, 0, 15680, 32, 0, 15696, 32, 0, 15712, 32, 0, 16256, 17476, 0, 16256, 17476, 0, 16256, 17476, 0, 16256, 17476, 0, 16272, 17476, 0, 16272, 17476, 0, 16272, 17476, 0, 16272, 17476, 0, 16288, 17476, 0, 16288, 17476, 0, 16288, 17476, 0, 16288, 17476, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 9728, 2, 0, 9744, 2, 0, 11456, 8192, 0, 11460, 8192, 0, 11464, 8192, 0, 11472, 8192, 0, 11476, 8192, 0, 11480, 8192, 0, 12544, 8738, 0, 12544, 8738, 0, 12544, 8738, 0, 12544, 8738, 0, 12560, 8738, 0, 12560, 8738, 0, 12560, 8738, 0, 12560, 8738, 0, 12576, 8738, 0, 12576, 8738, 0, 12576, 8738, 0, 12576, 8738, 0, 15680, 32, 0, 15696, 32, 0, 15712, 32, 0, 16256, 17476, 0, 16256, 17476, 0, 16256, 17476, 0, 16256, 17476, 0, 16272, 17476, 0, 16272, 17476, 0, 16272, 17476, 0, 16272, 17476, 0, 16288, 17476, 0, 16288, 17476, 0, 16288, 17476, 0, 16288, 17476, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0, 16704, 61166, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579841672088029_675_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579841672088029_675_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fc402d4f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579841672088029_675_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,406 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 1856, 8192, 0, 4112, 8192, 0, 5504, 17, 0, 5504, 17, 0, 9792, 17476, 0, 9792, 17476, 0, 9792, 17476, 0, 9792, 17476, 0, 18960, 32776, 0, 18960, 32776, 0, 18976, 32776, 0, 18976, 32776, 0, 18992, 32776, 0, 18992, 32776, 0, 19664, 32768, 0, 19680, 32768, 0, 19696, 32768, 0, 21760, 2048, 0, 21776, 2048, 0, 21792, 2048, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 1856, 8192, 0, 4112, 8192, 0, 5504, 17, 0, 5504, 17, 0, 9792, 17476, 0, 9792, 17476, 0, 9792, 17476, 0, 9792, 17476, 0, 18960, 32776, 0, 18960, 32776, 0, 18976, 32776, 0, 18976, 32776, 0, 18992, 32776, 0, 18992, 32776, 0, 19664, 32768, 0, 19680, 32768, 0, 19696, 32768, 0, 21760, 2048, 0, 21776, 2048, 0, 21792, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579842318810631_676_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579842318810631_676_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c30cf0e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579842318810631_676_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,372 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 1))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 33288, 0, 1024, 33288, 0, 1024, 33288, 0, 4160, 2, 0, 4176, 2, 0, 6080, 2, 0, 6096, 2, 0, 11776, 73, 0, 11776, 73, 0, 11776, 73, 0, 14208, 8208, 0, 14208, 8208, 0, 15888, 8192, 0, 15904, 8192, 0, 15920, 8192, 0, 21824, 1024, 0, 22464, 18724, 0, 22464, 18724, 0, 22464, 18724, 0, 22464, 18724, 0, 22464, 18724, 0, 23552, 65, 0, 23552, 65, 0, 24128, 1040, 0, 24128, 1040, 0, 24448, 16644, 0, 24448, 16644, 0, 24448, 16644, 0, 1024, 33288, 0, 1024, 33288, 0, 1024, 33288, 0, 4160, 2, 0, 4176, 2, 0, 6080, 2, 0, 6096, 2, 0, 11776, 73, 0, 11776, 73, 0, 11776, 73, 0, 14208, 8208, 0, 14208, 8208, 0, 15888, 8192, 0, 15904, 8192, 0, 15920, 8192, 0, 21824, 1024, 0, 22464, 18724, 0, 22464, 18724, 0, 22464, 18724, 0, 22464, 18724, 0, 22464, 18724, 0, 23552, 65, 0, 23552, 65, 0, 24128, 1040, 0, 24128, 1040, 0, 24448, 16644, 0, 24448, 16644, 0, 24448, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579845803076463_678_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579845803076463_678_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..caaa62d9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579845803076463_678_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((299 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 576 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6164, 2, 0, 6168, 2, 0, 6172, 2, 0, 6180, 2, 0, 6184, 2, 0, 6188, 2, 0, 8320, 8192, 0, 8336, 8192, 0, 8352, 8192, 0, 10944, 1024, 0, 10960, 1024, 0, 10976, 1024, 0, 15120, 5698, 0, 15120, 5698, 0, 15120, 5698, 0, 15120, 5698, 0, 15120, 5698, 0, 15136, 5698, 0, 15136, 5698, 0, 15136, 5698, 0, 15136, 5698, 0, 15136, 5698, 0, 15152, 5698, 0, 15152, 5698, 0, 15152, 5698, 0, 15152, 5698, 0, 15152, 5698, 0, 15760, 15, 0, 15760, 15, 0, 15760, 15, 0, 15760, 15, 0, 15776, 15, 0, 15776, 15, 0, 15776, 15, 0, 15776, 15, 0, 15792, 15, 0, 15792, 15, 0, 15792, 15, 0, 15792, 15, 0, 16400, 85, 0, 16400, 85, 0, 16400, 85, 0, 16400, 85, 0, 16416, 85, 0, 16416, 85, 0, 16416, 85, 0, 16416, 85, 0, 16432, 85, 0, 16432, 85, 0, 16432, 85, 0, 16432, 85, 0, 16976, 85, 0, 16976, 85, 0, 16976, 85, 0, 16976, 85, 0, 16992, 85, 0, 16992, 85, 0, 16992, 85, 0, 16992, 85, 0, 17008, 85, 0, 17008, 85, 0, 17008, 85, 0, 17008, 85, 0, 17552, 7, 0, 17552, 7, 0, 17552, 7, 0, 17568, 7, 0, 17568, 7, 0, 17568, 7, 0, 17584, 7, 0, 17584, 7, 0, 17584, 7, 0, 18256, 57344, 0, 18256, 57344, 0, 18256, 57344, 0, 18272, 57344, 0, 18272, 57344, 0, 18272, 57344, 0, 18288, 57344, 0, 18288, 57344, 0, 18288, 57344, 0, 19156, 16384, 0, 19172, 16384, 0, 19188, 16384, 0, 20048, 32768, 0, 20064, 32768, 0, 20080, 32768, 0, 21008, 36992, 0, 21008, 36992, 0, 21008, 36992, 0, 21024, 36992, 0, 21024, 36992, 0, 21024, 36992, 0, 21040, 36992, 0, 21040, 36992, 0, 21040, 36992, 0, 6164, 2, 0, 6168, 2, 0, 6172, 2, 0, 6180, 2, 0, 6184, 2, 0, 6188, 2, 0, 8320, 8192, 0, 8336, 8192, 0, 8352, 8192, 0, 10944, 1024, 0, 10960, 1024, 0, 10976, 1024, 0, 15120, 5698, 0, 15120, 5698, 0, 15120, 5698, 0, 15120, 5698, 0, 15120, 5698, 0, 15136, 5698, 0, 15136, 5698, 0, 15136, 5698, 0, 15136, 5698, 0, 15136, 5698, 0, 15152, 5698, 0, 15152, 5698, 0, 15152, 5698, 0, 15152, 5698, 0, 15152, 5698, 0, 15760, 15, 0, 15760, 15, 0, 15760, 15, 0, 15760, 15, 0, 15776, 15, 0, 15776, 15, 0, 15776, 15, 0, 15776, 15, 0, 15792, 15, 0, 15792, 15, 0, 15792, 15, 0, 15792, 15, 0, 16400, 85, 0, 16400, 85, 0, 16400, 85, 0, 16400, 85, 0, 16416, 85, 0, 16416, 85, 0, 16416, 85, 0, 16416, 85, 0, 16432, 85, 0, 16432, 85, 0, 16432, 85, 0, 16432, 85, 0, 16976, 85, 0, 16976, 85, 0, 16976, 85, 0, 16976, 85, 0, 16992, 85, 0, 16992, 85, 0, 16992, 85, 0, 16992, 85, 0, 17008, 85, 0, 17008, 85, 0, 17008, 85, 0, 17008, 85, 0, 17552, 7, 0, 17552, 7, 0, 17552, 7, 0, 17568, 7, 0, 17568, 7, 0, 17568, 7, 0, 17584, 7, 0, 17584, 7, 0, 17584, 7, 0, 18256, 57344, 0, 18256, 57344, 0, 18256, 57344, 0, 18272, 57344, 0, 18272, 57344, 0, 18272, 57344, 0, 18288, 57344, 0, 18288, 57344, 0, 18288, 57344, 0, 19156, 16384, 0, 19172, 16384, 0, 19188, 16384, 0, 20048, 32768, 0, 20064, 32768, 0, 20080, 32768, 0, 21008, 36992, 0, 21008, 36992, 0, 21008, 36992, 0, 21024, 36992, 0, 21024, 36992, 0, 21024, 36992, 0, 21040, 36992, 0, 21040, 36992, 0, 21040, 36992, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579927815372049_679_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579927815372049_679_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0cf003a9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579927815372049_679_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4880, 272, 0, 4880, 272, 0, 4896, 272, 0, 4896, 272, 0, 6912, 2, 0, 6916, 2, 0, 6920, 2, 0, 6928, 2, 0, 6932, 2, 0, 6936, 2, 0, 8832, 2, 0, 8836, 2, 0, 8840, 2, 0, 8848, 2, 0, 8852, 2, 0, 8856, 2, 0, 9920, 544, 0, 9920, 544, 0, 9936, 544, 0, 9936, 544, 0, 10240, 17476, 0, 10240, 17476, 0, 10240, 17476, 0, 10240, 17476, 0, 4880, 272, 0, 4880, 272, 0, 4896, 272, 0, 4896, 272, 0, 6912, 2, 0, 6916, 2, 0, 6920, 2, 0, 6928, 2, 0, 6932, 2, 0, 6936, 2, 0, 8832, 2, 0, 8836, 2, 0, 8840, 2, 0, 8848, 2, 0, 8852, 2, 0, 8856, 2, 0, 9920, 544, 0, 9920, 544, 0, 9936, 544, 0, 9936, 544, 0, 10240, 17476, 0, 10240, 17476, 0, 10240, 17476, 0, 10240, 17476, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579929092372917_680_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579929092372917_680_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7bba712d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579929092372917_680_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 450 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 1, 0, 1040, 1, 0, 1056, 1, 0, 1984, 1, 0, 1988, 1, 0, 1992, 1, 0, 2000, 1, 0, 2004, 1, 0, 2008, 1, 0, 2016, 1, 0, 2020, 1, 0, 2024, 1, 0, 5184, 264, 0, 5184, 264, 0, 5188, 264, 0, 5188, 264, 0, 5192, 264, 0, 5192, 264, 0, 5200, 264, 0, 5200, 264, 0, 5204, 264, 0, 5204, 264, 0, 5208, 264, 0, 5208, 264, 0, 5216, 264, 0, 5216, 264, 0, 5220, 264, 0, 5220, 264, 0, 5224, 264, 0, 5224, 264, 0, 5632, 31, 0, 5632, 31, 0, 5632, 31, 0, 5632, 31, 0, 5632, 31, 0, 5636, 31, 0, 5636, 31, 0, 5636, 31, 0, 5636, 31, 0, 5636, 31, 0, 5640, 31, 0, 5640, 31, 0, 5640, 31, 0, 5640, 31, 0, 5640, 31, 0, 5648, 31, 0, 5648, 31, 0, 5648, 31, 0, 5648, 31, 0, 5648, 31, 0, 5652, 31, 0, 5652, 31, 0, 5652, 31, 0, 5652, 31, 0, 5652, 31, 0, 5656, 31, 0, 5656, 31, 0, 5656, 31, 0, 5656, 31, 0, 5656, 31, 0, 5664, 31, 0, 5664, 31, 0, 5664, 31, 0, 5664, 31, 0, 5664, 31, 0, 5668, 31, 0, 5668, 31, 0, 5668, 31, 0, 5668, 31, 0, 5668, 31, 0, 5672, 31, 0, 5672, 31, 0, 5672, 31, 0, 5672, 31, 0, 5672, 31, 0, 1024, 1, 0, 1040, 1, 0, 1056, 1, 0, 1984, 1, 0, 1988, 1, 0, 1992, 1, 0, 2000, 1, 0, 2004, 1, 0, 2008, 1, 0, 2016, 1, 0, 2020, 1, 0, 2024, 1, 0, 5184, 264, 0, 5184, 264, 0, 5188, 264, 0, 5188, 264, 0, 5192, 264, 0, 5192, 264, 0, 5200, 264, 0, 5200, 264, 0, 5204, 264, 0, 5204, 264, 0, 5208, 264, 0, 5208, 264, 0, 5216, 264, 0, 5216, 264, 0, 5220, 264, 0, 5220, 264, 0, 5224, 264, 0, 5224, 264, 0, 5632, 31, 0, 5632, 31, 0, 5632, 31, 0, 5632, 31, 0, 5632, 31, 0, 5636, 31, 0, 5636, 31, 0, 5636, 31, 0, 5636, 31, 0, 5636, 31, 0, 5640, 31, 0, 5640, 31, 0, 5640, 31, 0, 5640, 31, 0, 5640, 31, 0, 5648, 31, 0, 5648, 31, 0, 5648, 31, 0, 5648, 31, 0, 5648, 31, 0, 5652, 31, 0, 5652, 31, 0, 5652, 31, 0, 5652, 31, 0, 5652, 31, 0, 5656, 31, 0, 5656, 31, 0, 5656, 31, 0, 5656, 31, 0, 5656, 31, 0, 5664, 31, 0, 5664, 31, 0, 5664, 31, 0, 5664, 31, 0, 5664, 31, 0, 5668, 31, 0, 5668, 31, 0, 5668, 31, 0, 5668, 31, 0, 5668, 31, 0, 5672, 31, 0, 5672, 31, 0, 5672, 31, 0, 5672, 31, 0, 5672, 31, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579944530823084_682_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579944530823084_682_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..05f6029b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579944530823084_682_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,299 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 21509, 0, 1472, 21509, 0, 1472, 21509, 0, 1472, 21509, 0, 1472, 21509, 0, 1488, 21509, 0, 1488, 21509, 0, 1488, 21509, 0, 1488, 21509, 0, 1488, 21509, 0, 1504, 21509, 0, 1504, 21509, 0, 1504, 21509, 0, 1504, 21509, 0, 1504, 21509, 0, 3520, 8, 0, 6144, 4096, 0, 8128, 4096, 0, 8132, 4096, 0, 8144, 4096, 0, 8148, 4096, 0, 8160, 4096, 0, 8164, 4096, 0, 16640, 16384, 0, 16656, 16384, 0, 16672, 16384, 0, 17984, 34952, 0, 17984, 34952, 0, 17984, 34952, 0, 17984, 34952, 0, 1472, 21509, 0, 1472, 21509, 0, 1472, 21509, 0, 1472, 21509, 0, 1472, 21509, 0, 1488, 21509, 0, 1488, 21509, 0, 1488, 21509, 0, 1488, 21509, 0, 1488, 21509, 0, 1504, 21509, 0, 1504, 21509, 0, 1504, 21509, 0, 1504, 21509, 0, 1504, 21509, 0, 3520, 8, 0, 6144, 4096, 0, 8128, 4096, 0, 8132, 4096, 0, 8144, 4096, 0, 8148, 4096, 0, 8160, 4096, 0, 8164, 4096, 0, 16640, 16384, 0, 16656, 16384, 0, 16672, 16384, 0, 17984, 34952, 0, 17984, 34952, 0, 17984, 34952, 0, 17984, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579947374532147_684_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579947374532147_684_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f291adf5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579947374532147_684_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 32768, 0, 3456, 32768, 0, 4032, 1040, 0, 4032, 1040, 0, 6784, 16388, 0, 6784, 16388, 0, 7744, 256, 0, 7760, 256, 0, 8192, 256, 0, 8208, 256, 0, 2752, 32768, 0, 3456, 32768, 0, 4032, 1040, 0, 4032, 1040, 0, 6784, 16388, 0, 6784, 16388, 0, 7744, 256, 0, 7760, 256, 0, 8192, 256, 0, 8208, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579947606726635_685_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579947606726635_685_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eb24da8d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579947606726635_685_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,80 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579947682733187_686_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579947682733187_686_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..595ee6d7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579947682733187_686_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3264, 1024, 0, 4224, 16384, 0, 4240, 16384, 0, 8832, 2056, 0, 8832, 2056, 0, 576, 17, 0, 576, 17, 0, 3264, 1024, 0, 4224, 16384, 0, 4240, 16384, 0, 8832, 2056, 0, 8832, 2056, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579947944613541_687_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579947944613541_687_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff9d001a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579947944613541_687_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,276 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 32768, 0, 5568, 1040, 0, 5568, 1040, 0, 6592, 16644, 0, 6592, 16644, 0, 6592, 16644, 0, 10816, 85, 0, 10816, 85, 0, 10816, 85, 0, 10816, 85, 0, 768, 32768, 0, 5568, 1040, 0, 5568, 1040, 0, 6592, 16644, 0, 6592, 16644, 0, 6592, 16644, 0, 10816, 85, 0, 10816, 85, 0, 10816, 85, 0, 10816, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756579950379519870_688_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756579950379519870_688_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0490c365 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756579950379519870_688_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,70 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 4)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 15, 0, 1088, 15, 0, 1088, 15, 0, 1088, 15, 0, 1104, 15, 0, 1104, 15, 0, 1104, 15, 0, 1104, 15, 0, 1088, 15, 0, 1088, 15, 0, 1088, 15, 0, 1088, 15, 0, 1104, 15, 0, 1104, 15, 0, 1104, 15, 0, 1104, 15, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580046312101204_690_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580046312101204_690_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e7eb5bd6 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580046312101204_690_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 5248, 2080, 0, 5248, 2080, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 5248, 2080, 0, 5248, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580046778693752_691_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580046778693752_691_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..276feaa2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580046778693752_691_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,375 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5056, 4, 0, 5060, 4, 0, 5064, 4, 0, 5072, 4, 0, 5076, 4, 0, 5080, 4, 0, 5088, 4, 0, 5092, 4, 0, 5096, 4, 0, 7232, 4, 0, 7236, 4, 0, 7240, 4, 0, 7248, 4, 0, 7252, 4, 0, 7256, 4, 0, 7264, 4, 0, 7268, 4, 0, 7272, 4, 0, 9536, 1, 0, 11008, 4097, 0, 11008, 4097, 0, 13184, 10320, 0, 13184, 10320, 0, 13184, 10320, 0, 13184, 10320, 0, 13824, 72, 0, 13824, 72, 0, 15936, 8194, 0, 15936, 8194, 0, 17280, 260, 0, 17280, 260, 0, 17856, 2048, 0, 19328, 33922, 0, 19328, 33922, 0, 19328, 33922, 0, 19328, 33922, 0, 19968, 85, 0, 19968, 85, 0, 19968, 85, 0, 19968, 85, 0, 20608, 8, 0, 21504, 2080, 0, 21504, 2080, 0, 5056, 4, 0, 5060, 4, 0, 5064, 4, 0, 5072, 4, 0, 5076, 4, 0, 5080, 4, 0, 5088, 4, 0, 5092, 4, 0, 5096, 4, 0, 7232, 4, 0, 7236, 4, 0, 7240, 4, 0, 7248, 4, 0, 7252, 4, 0, 7256, 4, 0, 7264, 4, 0, 7268, 4, 0, 7272, 4, 0, 9536, 1, 0, 11008, 4097, 0, 11008, 4097, 0, 13184, 10320, 0, 13184, 10320, 0, 13184, 10320, 0, 13184, 10320, 0, 13824, 72, 0, 13824, 72, 0, 15936, 8194, 0, 15936, 8194, 0, 17280, 260, 0, 17280, 260, 0, 17856, 2048, 0, 19328, 33922, 0, 19328, 33922, 0, 19328, 33922, 0, 19328, 33922, 0, 19968, 85, 0, 19968, 85, 0, 19968, 85, 0, 19968, 85, 0, 20608, 8, 0, 21504, 2080, 0, 21504, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580056988516269_693_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580056988516269_693_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..052cfa65 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580056988516269_693_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0, 1088, 57359, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580059093400181_695_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580059093400181_695_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a3297507 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580059093400181_695_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,325 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4368, 16, 0, 4384, 16, 0, 4400, 16, 0, 5072, 16, 0, 5088, 16, 0, 5104, 16, 0, 5376, 16644, 0, 5376, 16644, 0, 5376, 16644, 0, 12672, 32768, 0, 13312, 73, 0, 13312, 73, 0, 13312, 73, 0, 18624, 32768, 0, 19200, 1040, 0, 19200, 1040, 0, 19520, 18724, 0, 19520, 18724, 0, 19520, 18724, 0, 19520, 18724, 0, 19520, 18724, 0, 20160, 85, 0, 20160, 85, 0, 20160, 85, 0, 20160, 85, 0, 4368, 16, 0, 4384, 16, 0, 4400, 16, 0, 5072, 16, 0, 5088, 16, 0, 5104, 16, 0, 5376, 16644, 0, 5376, 16644, 0, 5376, 16644, 0, 12672, 32768, 0, 13312, 73, 0, 13312, 73, 0, 13312, 73, 0, 18624, 32768, 0, 19200, 1040, 0, 19200, 1040, 0, 19520, 18724, 0, 19520, 18724, 0, 19520, 18724, 0, 19520, 18724, 0, 19520, 18724, 0, 20160, 85, 0, 20160, 85, 0, 20160, 85, 0, 20160, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580061447672440_698_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580061447672440_698_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a6389f2b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580061447672440_698_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((174 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((267 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 1)) { + break; + } + } + } + break; + } + case 2: { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((292 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i8 == 1)) { + continue; + } + if ((i8 == 2)) { + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 552 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1728, 2, 0, 1744, 2, 0, 5120, 17476, 0, 5120, 17476, 0, 5120, 17476, 0, 5120, 17476, 0, 9552, 4400, 0, 9552, 4400, 0, 9552, 4400, 0, 9552, 4400, 0, 9568, 4400, 0, 9568, 4400, 0, 9568, 4400, 0, 9568, 4400, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 11152, 32768, 0, 11156, 32768, 0, 11160, 32768, 0, 11168, 32768, 0, 11172, 32768, 0, 11176, 32768, 0, 15680, 3, 0, 15680, 3, 0, 15696, 3, 0, 15696, 3, 0, 15712, 3, 0, 15712, 3, 0, 18704, 68, 0, 18704, 68, 0, 18708, 68, 0, 18708, 68, 0, 18712, 68, 0, 18712, 68, 0, 18720, 68, 0, 18720, 68, 0, 18724, 68, 0, 18724, 68, 0, 18728, 68, 0, 18728, 68, 0, 18736, 68, 0, 18736, 68, 0, 18740, 68, 0, 18740, 68, 0, 18744, 68, 0, 18744, 68, 0, 19520, 34952, 0, 19520, 34952, 0, 19520, 34952, 0, 19520, 34952, 0, 576, 17, 0, 576, 17, 0, 1728, 2, 0, 1744, 2, 0, 5120, 17476, 0, 5120, 17476, 0, 5120, 17476, 0, 5120, 17476, 0, 9552, 4400, 0, 9552, 4400, 0, 9552, 4400, 0, 9552, 4400, 0, 9568, 4400, 0, 9568, 4400, 0, 9568, 4400, 0, 9568, 4400, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10512, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10516, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10520, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10528, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10532, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 10536, 65024, 0, 11152, 32768, 0, 11156, 32768, 0, 11160, 32768, 0, 11168, 32768, 0, 11172, 32768, 0, 11176, 32768, 0, 15680, 3, 0, 15680, 3, 0, 15696, 3, 0, 15696, 3, 0, 15712, 3, 0, 15712, 3, 0, 18704, 68, 0, 18704, 68, 0, 18708, 68, 0, 18708, 68, 0, 18712, 68, 0, 18712, 68, 0, 18720, 68, 0, 18720, 68, 0, 18724, 68, 0, 18724, 68, 0, 18728, 68, 0, 18728, 68, 0, 18736, 68, 0, 18736, 68, 0, 18740, 68, 0, 18740, 68, 0, 18744, 68, 0, 18744, 68, 0, 19520, 34952, 0, 19520, 34952, 0, 19520, 34952, 0, 19520, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580096448533657_699_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580096448533657_699_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b717ff32 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580096448533657_699_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,298 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((222 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((232 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((251 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 21, 0, 768, 21, 0, 768, 21, 0, 5952, 16384, 0, 8832, 21504, 0, 8832, 21504, 0, 8832, 21504, 0, 8848, 21504, 0, 8848, 21504, 0, 8848, 21504, 0, 9664, 16384, 0, 9680, 16384, 0, 10368, 16384, 0, 12624, 4680, 0, 12624, 4680, 0, 12624, 4680, 0, 12624, 4680, 0, 12640, 4680, 0, 12640, 4680, 0, 12640, 4680, 0, 12640, 4680, 0, 14224, 2120, 0, 14224, 2120, 0, 14224, 2120, 0, 14228, 2120, 0, 14228, 2120, 0, 14228, 2120, 0, 14240, 2120, 0, 14240, 2120, 0, 14240, 2120, 0, 14244, 2120, 0, 14244, 2120, 0, 14244, 2120, 0, 17488, 4644, 0, 17488, 4644, 0, 17488, 4644, 0, 17488, 4644, 0, 17504, 4644, 0, 17504, 4644, 0, 17504, 4644, 0, 17504, 4644, 0, 768, 21, 0, 768, 21, 0, 768, 21, 0, 5952, 16384, 0, 8832, 21504, 0, 8832, 21504, 0, 8832, 21504, 0, 8848, 21504, 0, 8848, 21504, 0, 8848, 21504, 0, 9664, 16384, 0, 9680, 16384, 0, 10368, 16384, 0, 12624, 4680, 0, 12624, 4680, 0, 12624, 4680, 0, 12624, 4680, 0, 12640, 4680, 0, 12640, 4680, 0, 12640, 4680, 0, 12640, 4680, 0, 14224, 2120, 0, 14224, 2120, 0, 14224, 2120, 0, 14228, 2120, 0, 14228, 2120, 0, 14228, 2120, 0, 14240, 2120, 0, 14240, 2120, 0, 14240, 2120, 0, 14244, 2120, 0, 14244, 2120, 0, 14244, 2120, 0, 17488, 4644, 0, 17488, 4644, 0, 17488, 4644, 0, 17488, 4644, 0, 17504, 4644, 0, 17504, 4644, 0, 17504, 4644, 0, 17504, 4644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580129937755636_701_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580129937755636_701_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2d6ef990 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580129937755636_701_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,375 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((314 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((325 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 462 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2192, 8192, 0, 2208, 8192, 0, 2224, 8192, 0, 2896, 2, 0, 2912, 2, 0, 2928, 2, 0, 4368, 16, 0, 4384, 16, 0, 4400, 16, 0, 5760, 18432, 0, 5760, 18432, 0, 5776, 18432, 0, 5776, 18432, 0, 5792, 18432, 0, 5792, 18432, 0, 7424, 36, 0, 7424, 36, 0, 7440, 36, 0, 7440, 36, 0, 7456, 36, 0, 7456, 36, 0, 7872, 36, 0, 7872, 36, 0, 7888, 36, 0, 7888, 36, 0, 7904, 36, 0, 7904, 36, 0, 8320, 18432, 0, 8320, 18432, 0, 8336, 18432, 0, 8336, 18432, 0, 8352, 18432, 0, 8352, 18432, 0, 9984, 32770, 0, 9984, 32770, 0, 10000, 32770, 0, 10000, 32770, 0, 10016, 32770, 0, 10016, 32770, 0, 11328, 32770, 0, 11328, 32770, 0, 11344, 32770, 0, 11344, 32770, 0, 11360, 32770, 0, 11360, 32770, 0, 16256, 16384, 0, 17792, 16384, 0, 18960, 32776, 0, 18960, 32776, 0, 18976, 32776, 0, 18976, 32776, 0, 18992, 32776, 0, 18992, 32776, 0, 20116, 32768, 0, 20120, 32768, 0, 20124, 32768, 0, 20132, 32768, 0, 20136, 32768, 0, 20140, 32768, 0, 20148, 32768, 0, 20152, 32768, 0, 20156, 32768, 0, 20820, 32768, 0, 20824, 32768, 0, 20828, 32768, 0, 20836, 32768, 0, 20840, 32768, 0, 20844, 32768, 0, 20852, 32768, 0, 20856, 32768, 0, 20860, 32768, 0, 21520, 32768, 0, 21536, 32768, 0, 21552, 32768, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2192, 8192, 0, 2208, 8192, 0, 2224, 8192, 0, 2896, 2, 0, 2912, 2, 0, 2928, 2, 0, 4368, 16, 0, 4384, 16, 0, 4400, 16, 0, 5760, 18432, 0, 5760, 18432, 0, 5776, 18432, 0, 5776, 18432, 0, 5792, 18432, 0, 5792, 18432, 0, 7424, 36, 0, 7424, 36, 0, 7440, 36, 0, 7440, 36, 0, 7456, 36, 0, 7456, 36, 0, 7872, 36, 0, 7872, 36, 0, 7888, 36, 0, 7888, 36, 0, 7904, 36, 0, 7904, 36, 0, 8320, 18432, 0, 8320, 18432, 0, 8336, 18432, 0, 8336, 18432, 0, 8352, 18432, 0, 8352, 18432, 0, 9984, 32770, 0, 9984, 32770, 0, 10000, 32770, 0, 10000, 32770, 0, 10016, 32770, 0, 10016, 32770, 0, 11328, 32770, 0, 11328, 32770, 0, 11344, 32770, 0, 11344, 32770, 0, 11360, 32770, 0, 11360, 32770, 0, 16256, 16384, 0, 17792, 16384, 0, 18960, 32776, 0, 18960, 32776, 0, 18976, 32776, 0, 18976, 32776, 0, 18992, 32776, 0, 18992, 32776, 0, 20116, 32768, 0, 20120, 32768, 0, 20124, 32768, 0, 20132, 32768, 0, 20136, 32768, 0, 20140, 32768, 0, 20148, 32768, 0, 20152, 32768, 0, 20156, 32768, 0, 20820, 32768, 0, 20824, 32768, 0, 20828, 32768, 0, 20836, 32768, 0, 20840, 32768, 0, 20844, 32768, 0, 20852, 32768, 0, 20856, 32768, 0, 20860, 32768, 0, 21520, 32768, 0, 21536, 32768, 0, 21552, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580198311583385_704_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580198311583385_704_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..76b58a61 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580198311583385_704_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580255017548054_707_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580255017548054_707_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2460a4fa --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580255017548054_707_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((counter0 == 1)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2964, 16400, 0, 2964, 16400, 0, 2968, 16400, 0, 2968, 16400, 0, 4480, 61441, 0, 4480, 61441, 0, 4480, 61441, 0, 4480, 61441, 0, 4480, 61441, 0, 5632, 49153, 0, 5632, 49153, 0, 5632, 49153, 0, 8832, 10912, 0, 8832, 10912, 0, 8832, 10912, 0, 8832, 10912, 0, 8832, 10912, 0, 2964, 16400, 0, 2964, 16400, 0, 2968, 16400, 0, 2968, 16400, 0, 4480, 61441, 0, 4480, 61441, 0, 4480, 61441, 0, 4480, 61441, 0, 4480, 61441, 0, 5632, 49153, 0, 5632, 49153, 0, 5632, 49153, 0, 8832, 10912, 0, 8832, 10912, 0, 8832, 10912, 0, 8832, 10912, 0, 8832, 10912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580256293498214_708_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580256293498214_708_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5104d4a1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580256293498214_708_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580265477168829_710_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580265477168829_710_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a4a2da6e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580265477168829_710_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 1, 0, 2688, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580338100087803_714_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580338100087803_714_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a178e8bb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580338100087803_714_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,310 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 10))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 9)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((290 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (353 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3968, 1, 0, 5376, 16384, 0, 11200, 73, 0, 11200, 73, 0, 11200, 73, 0, 13776, 8192, 0, 13792, 8192, 0, 13808, 8192, 0, 15680, 8192, 0, 16128, 9216, 0, 16128, 9216, 0, 16704, 130, 0, 16704, 130, 0, 17664, 128, 0, 17680, 128, 0, 17696, 128, 0, 18564, 18, 0, 18564, 18, 0, 18568, 18, 0, 18568, 18, 0, 18572, 18, 0, 18572, 18, 0, 18580, 18, 0, 18580, 18, 0, 18584, 18, 0, 18584, 18, 0, 18588, 18, 0, 18588, 18, 0, 18596, 18, 0, 18596, 18, 0, 18600, 18, 0, 18600, 18, 0, 18604, 18, 0, 18604, 18, 0, 19264, 18724, 0, 19264, 18724, 0, 19264, 18724, 0, 19264, 18724, 0, 19264, 18724, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22336, 130, 0, 22336, 130, 0, 22080, 40968, 0, 22080, 40968, 0, 22080, 40968, 0, 3968, 1, 0, 5376, 16384, 0, 11200, 73, 0, 11200, 73, 0, 11200, 73, 0, 13776, 8192, 0, 13792, 8192, 0, 13808, 8192, 0, 15680, 8192, 0, 16128, 9216, 0, 16128, 9216, 0, 16704, 130, 0, 16704, 130, 0, 17664, 128, 0, 17680, 128, 0, 17696, 128, 0, 18564, 18, 0, 18564, 18, 0, 18568, 18, 0, 18568, 18, 0, 18572, 18, 0, 18572, 18, 0, 18580, 18, 0, 18580, 18, 0, 18584, 18, 0, 18584, 18, 0, 18588, 18, 0, 18588, 18, 0, 18596, 18, 0, 18596, 18, 0, 18600, 18, 0, 18600, 18, 0, 18604, 18, 0, 18604, 18, 0, 19264, 18724, 0, 19264, 18724, 0, 19264, 18724, 0, 19264, 18724, 0, 19264, 18724, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22592, 21845, 0, 22336, 130, 0, 22336, 130, 0, 22080, 40968, 0, 22080, 40968, 0, 22080, 40968, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580343114846443_715_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580343114846443_715_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5ee623d7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580343114846443_715_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,421 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((193 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((307 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((320 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 3328, 64, 0, 3904, 1024, 0, 4224, 16640, 0, 4224, 16640, 0, 5312, 8, 0, 6080, 8192, 0, 6400, 2080, 0, 6400, 2080, 0, 7040, 73, 0, 7040, 73, 0, 7040, 73, 0, 7616, 1040, 0, 7616, 1040, 0, 7936, 18724, 0, 7936, 18724, 0, 7936, 18724, 0, 7936, 18724, 0, 7936, 18724, 0, 8576, 73, 0, 8576, 73, 0, 8576, 73, 0, 11024, 1026, 0, 11024, 1026, 0, 11040, 1026, 0, 11040, 1026, 0, 11056, 1026, 0, 11056, 1026, 0, 19668, 16388, 0, 19668, 16388, 0, 19672, 16388, 0, 19672, 16388, 0, 19676, 16388, 0, 19676, 16388, 0, 19684, 16388, 0, 19684, 16388, 0, 19688, 16388, 0, 19688, 16388, 0, 19692, 16388, 0, 19692, 16388, 0, 20500, 18432, 0, 20500, 18432, 0, 20504, 18432, 0, 20504, 18432, 0, 20508, 18432, 0, 20508, 18432, 0, 20516, 18432, 0, 20516, 18432, 0, 20520, 18432, 0, 20520, 18432, 0, 20524, 18432, 0, 20524, 18432, 0, 768, 1, 0, 3328, 64, 0, 3904, 1024, 0, 4224, 16640, 0, 4224, 16640, 0, 5312, 8, 0, 6080, 8192, 0, 6400, 2080, 0, 6400, 2080, 0, 7040, 73, 0, 7040, 73, 0, 7040, 73, 0, 7616, 1040, 0, 7616, 1040, 0, 7936, 18724, 0, 7936, 18724, 0, 7936, 18724, 0, 7936, 18724, 0, 7936, 18724, 0, 8576, 73, 0, 8576, 73, 0, 8576, 73, 0, 11024, 1026, 0, 11024, 1026, 0, 11040, 1026, 0, 11040, 1026, 0, 11056, 1026, 0, 11056, 1026, 0, 19668, 16388, 0, 19668, 16388, 0, 19672, 16388, 0, 19672, 16388, 0, 19676, 16388, 0, 19676, 16388, 0, 19684, 16388, 0, 19684, 16388, 0, 19688, 16388, 0, 19688, 16388, 0, 19692, 16388, 0, 19692, 16388, 0, 20500, 18432, 0, 20500, 18432, 0, 20504, 18432, 0, 20504, 18432, 0, 20508, 18432, 0, 20508, 18432, 0, 20516, 18432, 0, 20516, 18432, 0, 20520, 18432, 0, 20520, 18432, 0, 20524, 18432, 0, 20524, 18432, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580367900330488_716_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580367900330488_716_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..81764cef --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580367900330488_716_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 17, 0, 2048, 17, 0, 3008, 34, 0, 3008, 34, 0, 3024, 34, 0, 3024, 34, 0, 3040, 34, 0, 3040, 34, 0, 4992, 17476, 0, 4992, 17476, 0, 4992, 17476, 0, 4992, 17476, 0, 7744, 85, 0, 7744, 85, 0, 7744, 85, 0, 7744, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 17, 0, 2048, 17, 0, 3008, 34, 0, 3008, 34, 0, 3024, 34, 0, 3024, 34, 0, 3040, 34, 0, 3040, 34, 0, 4992, 17476, 0, 4992, 17476, 0, 4992, 17476, 0, 4992, 17476, 0, 7744, 85, 0, 7744, 85, 0, 7744, 85, 0, 7744, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580369124365864_717_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580369124365864_717_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..02020be5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580369124365864_717_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,428 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((335 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((359 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((385 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((407 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((414 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((428 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + for (uint i9 = 0; (i9 < 2); i9 = (i9 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((458 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((469 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((479 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((488 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((499 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 4369, 0, 1024, 4369, 0, 1024, 4369, 0, 1024, 4369, 0, 1984, 4096, 0, 2000, 4096, 0, 2016, 4096, 0, 2432, 1, 0, 2448, 1, 0, 2464, 1, 0, 7440, 8738, 0, 7440, 8738, 0, 7440, 8738, 0, 7440, 8738, 0, 7456, 8738, 0, 7456, 8738, 0, 7456, 8738, 0, 7456, 8738, 0, 7472, 8738, 0, 7472, 8738, 0, 7472, 8738, 0, 7472, 8738, 0, 8912, 8738, 0, 8912, 8738, 0, 8912, 8738, 0, 8912, 8738, 0, 8928, 8738, 0, 8928, 8738, 0, 8928, 8738, 0, 8928, 8738, 0, 8944, 8738, 0, 8944, 8738, 0, 8944, 8738, 0, 8944, 8738, 0, 14848, 1028, 0, 14848, 1028, 0, 15296, 34952, 0, 15296, 34952, 0, 15296, 34952, 0, 15296, 34952, 0, 15936, 17, 0, 15936, 17, 0, 18560, 514, 0, 18560, 514, 0, 18576, 514, 0, 18576, 514, 0, 21440, 512, 0, 21456, 512, 0, 22976, 68, 0, 22976, 68, 0, 22992, 68, 0, 22992, 68, 0, 24640, 1024, 0, 24644, 1024, 0, 24648, 1024, 0, 24656, 1024, 0, 24660, 1024, 0, 24664, 1024, 0, 26048, 1024, 0, 26064, 1024, 0, 26496, 17408, 0, 26496, 17408, 0, 26512, 17408, 0, 26512, 17408, 0, 30032, 32768, 0, 30048, 32768, 0, 31952, 34824, 0, 31952, 34824, 0, 31952, 34824, 0, 31968, 34824, 0, 31968, 34824, 0, 31968, 34824, 0, 1024, 4369, 0, 1024, 4369, 0, 1024, 4369, 0, 1024, 4369, 0, 1984, 4096, 0, 2000, 4096, 0, 2016, 4096, 0, 2432, 1, 0, 2448, 1, 0, 2464, 1, 0, 7440, 8738, 0, 7440, 8738, 0, 7440, 8738, 0, 7440, 8738, 0, 7456, 8738, 0, 7456, 8738, 0, 7456, 8738, 0, 7456, 8738, 0, 7472, 8738, 0, 7472, 8738, 0, 7472, 8738, 0, 7472, 8738, 0, 8912, 8738, 0, 8912, 8738, 0, 8912, 8738, 0, 8912, 8738, 0, 8928, 8738, 0, 8928, 8738, 0, 8928, 8738, 0, 8928, 8738, 0, 8944, 8738, 0, 8944, 8738, 0, 8944, 8738, 0, 8944, 8738, 0, 14848, 1028, 0, 14848, 1028, 0, 15296, 34952, 0, 15296, 34952, 0, 15296, 34952, 0, 15296, 34952, 0, 15936, 17, 0, 15936, 17, 0, 18560, 514, 0, 18560, 514, 0, 18576, 514, 0, 18576, 514, 0, 21440, 512, 0, 21456, 512, 0, 22976, 68, 0, 22976, 68, 0, 22992, 68, 0, 22992, 68, 0, 24640, 1024, 0, 24644, 1024, 0, 24648, 1024, 0, 24656, 1024, 0, 24660, 1024, 0, 24664, 1024, 0, 26048, 1024, 0, 26064, 1024, 0, 26496, 17408, 0, 26496, 17408, 0, 26512, 17408, 0, 26512, 17408, 0, 30032, 32768, 0, 30048, 32768, 0, 31952, 34824, 0, 31952, 34824, 0, 31952, 34824, 0, 31968, 34824, 0, 31968, 34824, 0, 31968, 34824, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580382625635947_718_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580382625635947_718_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c7f069f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580382625635947_718_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,223 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 63, 0, 2496, 63, 0, 2496, 63, 0, 2496, 63, 0, 2496, 63, 0, 2496, 63, 0, 2112, 63488, 0, 2112, 63488, 0, 2112, 63488, 0, 2112, 63488, 0, 2112, 63488, 0, 1728, 640, 0, 1728, 640, 0, 1472, 1344, 0, 1472, 1344, 0, 1472, 1344, 0, 4352, 36865, 0, 4352, 36865, 0, 4352, 36865, 0, 4368, 36865, 0, 4368, 36865, 0, 4368, 36865, 0, 4384, 36865, 0, 4384, 36865, 0, 4384, 36865, 0, 5184, 32769, 0, 5184, 32769, 0, 5200, 32769, 0, 5200, 32769, 0, 5216, 32769, 0, 5216, 32769, 0, 5760, 1040, 0, 5760, 1040, 0, 8192, 2052, 0, 8192, 2052, 0, 9280, 16388, 0, 9280, 16388, 0, 9296, 16388, 0, 9296, 16388, 0, 9856, 16388, 0, 9856, 16388, 0, 9872, 16388, 0, 9872, 16388, 0, 11264, 16384, 0, 2496, 63, 0, 2496, 63, 0, 2496, 63, 0, 2496, 63, 0, 2496, 63, 0, 2496, 63, 0, 2112, 63488, 0, 2112, 63488, 0, 2112, 63488, 0, 2112, 63488, 0, 2112, 63488, 0, 1728, 640, 0, 1728, 640, 0, 1472, 1344, 0, 1472, 1344, 0, 1472, 1344, 0, 4352, 36865, 0, 4352, 36865, 0, 4352, 36865, 0, 4368, 36865, 0, 4368, 36865, 0, 4368, 36865, 0, 4384, 36865, 0, 4384, 36865, 0, 4384, 36865, 0, 5184, 32769, 0, 5184, 32769, 0, 5200, 32769, 0, 5200, 32769, 0, 5216, 32769, 0, 5216, 32769, 0, 5760, 1040, 0, 5760, 1040, 0, 8192, 2052, 0, 8192, 2052, 0, 9280, 16388, 0, 9280, 16388, 0, 9296, 16388, 0, 9296, 16388, 0, 9856, 16388, 0, 9856, 16388, 0, 9872, 16388, 0, 9872, 16388, 0, 11264, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580389717016500_720_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580389717016500_720_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0a1bdb22 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580389717016500_720_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,129 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 5312, 17476, 0, 5312, 17476, 0, 5312, 17476, 0, 5312, 17476, 0, 5760, 34952, 0, 5760, 34952, 0, 5760, 34952, 0, 5760, 34952, 0, 576, 17, 0, 576, 17, 0, 5312, 17476, 0, 5312, 17476, 0, 5312, 17476, 0, 5312, 17476, 0, 5760, 34952, 0, 5760, 34952, 0, 5760, 34952, 0, 5760, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580389867795034_721_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580389867795034_721_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca3b16ee --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580389867795034_721_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((185 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 636 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2448, 4369, 0, 2448, 4369, 0, 2448, 4369, 0, 2448, 4369, 0, 2452, 4369, 0, 2452, 4369, 0, 2452, 4369, 0, 2452, 4369, 0, 2456, 4369, 0, 2456, 4369, 0, 2456, 4369, 0, 2456, 4369, 0, 2464, 4369, 0, 2464, 4369, 0, 2464, 4369, 0, 2464, 4369, 0, 2468, 4369, 0, 2468, 4369, 0, 2468, 4369, 0, 2468, 4369, 0, 2472, 4369, 0, 2472, 4369, 0, 2472, 4369, 0, 2472, 4369, 0, 2480, 4369, 0, 2480, 4369, 0, 2480, 4369, 0, 2480, 4369, 0, 2484, 4369, 0, 2484, 4369, 0, 2484, 4369, 0, 2484, 4369, 0, 2488, 4369, 0, 2488, 4369, 0, 2488, 4369, 0, 2488, 4369, 0, 3024, 4369, 0, 3024, 4369, 0, 3024, 4369, 0, 3024, 4369, 0, 3040, 4369, 0, 3040, 4369, 0, 3040, 4369, 0, 3040, 4369, 0, 3056, 4369, 0, 3056, 4369, 0, 3056, 4369, 0, 3056, 4369, 0, 3904, 17476, 0, 3904, 17476, 0, 3904, 17476, 0, 3904, 17476, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 6272, 5, 0, 6272, 5, 0, 9424, 21525, 0, 9424, 21525, 0, 9424, 21525, 0, 9424, 21525, 0, 9424, 21525, 0, 9424, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 11856, 68, 0, 11856, 68, 0, 11860, 68, 0, 11860, 68, 0, 11872, 68, 0, 11872, 68, 0, 11876, 68, 0, 11876, 68, 0, 2448, 4369, 0, 2448, 4369, 0, 2448, 4369, 0, 2448, 4369, 0, 2452, 4369, 0, 2452, 4369, 0, 2452, 4369, 0, 2452, 4369, 0, 2456, 4369, 0, 2456, 4369, 0, 2456, 4369, 0, 2456, 4369, 0, 2464, 4369, 0, 2464, 4369, 0, 2464, 4369, 0, 2464, 4369, 0, 2468, 4369, 0, 2468, 4369, 0, 2468, 4369, 0, 2468, 4369, 0, 2472, 4369, 0, 2472, 4369, 0, 2472, 4369, 0, 2472, 4369, 0, 2480, 4369, 0, 2480, 4369, 0, 2480, 4369, 0, 2480, 4369, 0, 2484, 4369, 0, 2484, 4369, 0, 2484, 4369, 0, 2484, 4369, 0, 2488, 4369, 0, 2488, 4369, 0, 2488, 4369, 0, 2488, 4369, 0, 3024, 4369, 0, 3024, 4369, 0, 3024, 4369, 0, 3024, 4369, 0, 3040, 4369, 0, 3040, 4369, 0, 3040, 4369, 0, 3040, 4369, 0, 3056, 4369, 0, 3056, 4369, 0, 3056, 4369, 0, 3056, 4369, 0, 3904, 17476, 0, 3904, 17476, 0, 3904, 17476, 0, 3904, 17476, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5248, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 5824, 43690, 0, 6272, 5, 0, 6272, 5, 0, 9424, 21525, 0, 9424, 21525, 0, 9424, 21525, 0, 9424, 21525, 0, 9424, 21525, 0, 9424, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9428, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9440, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 9444, 21525, 0, 11856, 68, 0, 11856, 68, 0, 11860, 68, 0, 11860, 68, 0, 11872, 68, 0, 11872, 68, 0, 11876, 68, 0, 11876, 68, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580393779639704_722_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580393779639704_722_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..193baab7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580393779639704_722_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,169 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 5760, 1040, 0, 5760, 1040, 0, 6912, 16388, 0, 6912, 16388, 0, 8960, 4, 0, 768, 73, 0, 768, 73, 0, 768, 73, 0, 5760, 1040, 0, 5760, 1040, 0, 6912, 16388, 0, 6912, 16388, 0, 8960, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580446138498189_726_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580446138498189_726_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fbf97778 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580446138498189_726_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,402 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (402 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [704, 3, 0, 704, 3, 0, 1344, 1, 0, 6144, 1040, 0, 6144, 1040, 0, 6464, 18724, 0, 6464, 18724, 0, 6464, 18724, 0, 6464, 18724, 0, 6464, 18724, 0, 7104, 17, 0, 7104, 17, 0, 8064, 2, 0, 8080, 2, 0, 13888, 8738, 0, 13888, 8738, 0, 13888, 8738, 0, 13888, 8738, 0, 13904, 8738, 0, 13904, 8738, 0, 13904, 8738, 0, 13904, 8738, 0, 17728, 8738, 0, 17728, 8738, 0, 17728, 8738, 0, 17728, 8738, 0, 17744, 8738, 0, 17744, 8738, 0, 17744, 8738, 0, 17744, 8738, 0, 18176, 8704, 0, 18176, 8704, 0, 18192, 8704, 0, 18192, 8704, 0, 18496, 17476, 0, 18496, 17476, 0, 18496, 17476, 0, 18496, 17476, 0, 25728, 128, 0, 704, 3, 0, 704, 3, 0, 1344, 1, 0, 6144, 1040, 0, 6144, 1040, 0, 6464, 18724, 0, 6464, 18724, 0, 6464, 18724, 0, 6464, 18724, 0, 6464, 18724, 0, 7104, 17, 0, 7104, 17, 0, 8064, 2, 0, 8080, 2, 0, 13888, 8738, 0, 13888, 8738, 0, 13888, 8738, 0, 13888, 8738, 0, 13904, 8738, 0, 13904, 8738, 0, 13904, 8738, 0, 13904, 8738, 0, 17728, 8738, 0, 17728, 8738, 0, 17728, 8738, 0, 17728, 8738, 0, 17744, 8738, 0, 17744, 8738, 0, 17744, 8738, 0, 17744, 8738, 0, 18176, 8704, 0, 18176, 8704, 0, 18192, 8704, 0, 18192, 8704, 0, 18496, 17476, 0, 18496, 17476, 0, 18496, 17476, 0, 18496, 17476, 0, 25728, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580474176985866_729_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580474176985866_729_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..05229007 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580474176985866_729_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,94 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 85, 0, 1216, 85, 0, 1216, 85, 0, 1216, 85, 0, 1216, 85, 0, 1216, 85, 0, 1216, 85, 0, 1216, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580474287053340_730_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580474287053340_730_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f3a446b3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580474287053340_730_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,313 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((308 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 16389, 0, 1728, 16389, 0, 1728, 16389, 0, 2432, 16385, 0, 2432, 16385, 0, 3648, 520, 0, 3648, 520, 0, 3664, 520, 0, 3664, 520, 0, 4928, 32770, 0, 4928, 32770, 0, 4944, 32770, 0, 4944, 32770, 0, 6400, 2048, 0, 6416, 2048, 0, 8000, 512, 0, 8016, 512, 0, 8704, 128, 0, 8720, 128, 0, 10752, 32777, 0, 10752, 32777, 0, 10752, 32777, 0, 12224, 8, 0, 12240, 8, 0, 13440, 8, 0, 13456, 8, 0, 14912, 5201, 0, 14912, 5201, 0, 14912, 5201, 0, 14912, 5201, 0, 14912, 5201, 0, 18176, 16388, 0, 18176, 16388, 0, 18192, 16388, 0, 18192, 16388, 0, 19712, 18724, 0, 19712, 18724, 0, 19712, 18724, 0, 19712, 18724, 0, 19712, 18724, 0, 19728, 18724, 0, 19728, 18724, 0, 19728, 18724, 0, 19728, 18724, 0, 19728, 18724, 0, 20416, 16384, 0, 20432, 16384, 0, 1728, 16389, 0, 1728, 16389, 0, 1728, 16389, 0, 2432, 16385, 0, 2432, 16385, 0, 3648, 520, 0, 3648, 520, 0, 3664, 520, 0, 3664, 520, 0, 4928, 32770, 0, 4928, 32770, 0, 4944, 32770, 0, 4944, 32770, 0, 6400, 2048, 0, 6416, 2048, 0, 8000, 512, 0, 8016, 512, 0, 8704, 128, 0, 8720, 128, 0, 10752, 32777, 0, 10752, 32777, 0, 10752, 32777, 0, 12224, 8, 0, 12240, 8, 0, 13440, 8, 0, 13456, 8, 0, 14912, 5201, 0, 14912, 5201, 0, 14912, 5201, 0, 14912, 5201, 0, 14912, 5201, 0, 18176, 16388, 0, 18176, 16388, 0, 18192, 16388, 0, 18192, 16388, 0, 19712, 18724, 0, 19712, 18724, 0, 19712, 18724, 0, 19712, 18724, 0, 19712, 18724, 0, 19728, 18724, 0, 19728, 18724, 0, 19728, 18724, 0, 19728, 18724, 0, 19728, 18724, 0, 20416, 16384, 0, 20432, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580482369564093_731_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580482369564093_731_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d016acef --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580482369564093_731_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,367 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5008, 18, 0, 5008, 18, 0, 5024, 18, 0, 5024, 18, 0, 5040, 18, 0, 5040, 18, 0, 7232, 18724, 0, 7232, 18724, 0, 7232, 18724, 0, 7232, 18724, 0, 7232, 18724, 0, 7872, 17, 0, 7872, 17, 0, 12480, 17476, 0, 12480, 17476, 0, 12480, 17476, 0, 12480, 17476, 0, 12928, 34952, 0, 12928, 34952, 0, 12928, 34952, 0, 12928, 34952, 0, 14016, 73, 0, 14016, 73, 0, 14016, 73, 0, 16512, 1040, 0, 16512, 1040, 0, 16832, 18724, 0, 16832, 18724, 0, 16832, 18724, 0, 16832, 18724, 0, 16832, 18724, 0, 5008, 18, 0, 5008, 18, 0, 5024, 18, 0, 5024, 18, 0, 5040, 18, 0, 5040, 18, 0, 7232, 18724, 0, 7232, 18724, 0, 7232, 18724, 0, 7232, 18724, 0, 7232, 18724, 0, 7872, 17, 0, 7872, 17, 0, 12480, 17476, 0, 12480, 17476, 0, 12480, 17476, 0, 12480, 17476, 0, 12928, 34952, 0, 12928, 34952, 0, 12928, 34952, 0, 12928, 34952, 0, 14016, 73, 0, 14016, 73, 0, 14016, 73, 0, 16512, 1040, 0, 16512, 1040, 0, 16832, 18724, 0, 16832, 18724, 0, 16832, 18724, 0, 16832, 18724, 0, 16832, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580487806637844_733_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580487806637844_733_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f10befbb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580487806637844_733_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,361 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((326 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((341 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3584, 17476, 0, 3584, 17476, 0, 3584, 17476, 0, 3584, 17476, 0, 4224, 8, 0, 11456, 1, 0, 13440, 256, 0, 18688, 16384, 0, 18704, 16384, 0, 20868, 68, 0, 20868, 68, 0, 20884, 68, 0, 20884, 68, 0, 20900, 68, 0, 20900, 68, 0, 576, 17, 0, 576, 17, 0, 3584, 17476, 0, 3584, 17476, 0, 3584, 17476, 0, 3584, 17476, 0, 4224, 8, 0, 11456, 1, 0, 13440, 256, 0, 18688, 16384, 0, 18704, 16384, 0, 20868, 68, 0, 20868, 68, 0, 20884, 68, 0, 20884, 68, 0, 20900, 68, 0, 20900, 68, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580517927527561_735_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580517927527561_735_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e998e571 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580517927527561_735_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3328, 57347, 0, 3328, 57347, 0, 3328, 57347, 0, 3328, 57347, 0, 3328, 57347, 0, 3968, 1, 0, 4864, 17476, 0, 4864, 17476, 0, 4864, 17476, 0, 4864, 17476, 0, 8192, 2, 0, 9280, 8194, 0, 9280, 8194, 0, 9296, 8194, 0, 9296, 8194, 0, 9312, 8194, 0, 9312, 8194, 0, 12032, 272, 0, 12032, 272, 0, 3328, 57347, 0, 3328, 57347, 0, 3328, 57347, 0, 3328, 57347, 0, 3328, 57347, 0, 3968, 1, 0, 4864, 17476, 0, 4864, 17476, 0, 4864, 17476, 0, 4864, 17476, 0, 8192, 2, 0, 9280, 8194, 0, 9280, 8194, 0, 9296, 8194, 0, 9296, 8194, 0, 9312, 8194, 0, 9312, 8194, 0, 12032, 272, 0, 12032, 272, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580518468631920_736_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580518468631920_736_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ee6df6b1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580518468631920_736_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,223 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((121 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 7)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 612 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1664, 41610, 0, 1664, 41610, 0, 1664, 41610, 0, 1664, 41610, 0, 1664, 41610, 0, 1664, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 3584, 1088, 0, 3584, 1088, 0, 3600, 1088, 0, 3600, 1088, 0, 3616, 1088, 0, 3616, 1088, 0, 4160, 41610, 0, 4160, 41610, 0, 4160, 41610, 0, 4160, 41610, 0, 4160, 41610, 0, 4160, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4480, 18724, 0, 4480, 18724, 0, 4480, 18724, 0, 4480, 18724, 0, 4480, 18724, 0, 6656, 4096, 0, 6672, 4096, 0, 7744, 4161, 0, 7744, 4161, 0, 7744, 4161, 0, 7748, 4161, 0, 7748, 4161, 0, 7748, 4161, 0, 7752, 4161, 0, 7752, 4161, 0, 7752, 4161, 0, 7760, 4161, 0, 7760, 4161, 0, 7760, 4161, 0, 7764, 4161, 0, 7764, 4161, 0, 7764, 4161, 0, 7768, 4161, 0, 7768, 4161, 0, 7768, 4161, 0, 8320, 4161, 0, 8320, 4161, 0, 8320, 4161, 0, 8324, 4161, 0, 8324, 4161, 0, 8324, 4161, 0, 8328, 4161, 0, 8328, 4161, 0, 8328, 4161, 0, 8336, 4161, 0, 8336, 4161, 0, 8336, 4161, 0, 8340, 4161, 0, 8340, 4161, 0, 8340, 4161, 0, 8344, 4161, 0, 8344, 4161, 0, 8344, 4161, 0, 9536, 64, 0, 9552, 64, 0, 11008, 16, 0, 11024, 16, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1664, 41610, 0, 1664, 41610, 0, 1664, 41610, 0, 1664, 41610, 0, 1664, 41610, 0, 1664, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1680, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 1696, 41610, 0, 3584, 1088, 0, 3584, 1088, 0, 3600, 1088, 0, 3600, 1088, 0, 3616, 1088, 0, 3616, 1088, 0, 4160, 41610, 0, 4160, 41610, 0, 4160, 41610, 0, 4160, 41610, 0, 4160, 41610, 0, 4160, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4176, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4192, 41610, 0, 4480, 18724, 0, 4480, 18724, 0, 4480, 18724, 0, 4480, 18724, 0, 4480, 18724, 0, 6656, 4096, 0, 6672, 4096, 0, 7744, 4161, 0, 7744, 4161, 0, 7744, 4161, 0, 7748, 4161, 0, 7748, 4161, 0, 7748, 4161, 0, 7752, 4161, 0, 7752, 4161, 0, 7752, 4161, 0, 7760, 4161, 0, 7760, 4161, 0, 7760, 4161, 0, 7764, 4161, 0, 7764, 4161, 0, 7764, 4161, 0, 7768, 4161, 0, 7768, 4161, 0, 7768, 4161, 0, 8320, 4161, 0, 8320, 4161, 0, 8320, 4161, 0, 8324, 4161, 0, 8324, 4161, 0, 8324, 4161, 0, 8328, 4161, 0, 8328, 4161, 0, 8328, 4161, 0, 8336, 4161, 0, 8336, 4161, 0, 8336, 4161, 0, 8340, 4161, 0, 8340, 4161, 0, 8340, 4161, 0, 8344, 4161, 0, 8344, 4161, 0, 8344, 4161, 0, 9536, 64, 0, 9552, 64, 0, 11008, 16, 0, 11024, 16, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0, 14144, 28086, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580526780122299_737_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580526780122299_737_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ddb392ab --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580526780122299_737_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 61443, 0, 1088, 61443, 0, 1088, 61443, 0, 1088, 61443, 0, 1088, 61443, 0, 1088, 61443, 0, 1792, 32771, 0, 1792, 32771, 0, 1792, 32771, 0, 2432, 73, 0, 2432, 73, 0, 2432, 73, 0, 4160, 8193, 0, 4160, 8193, 0, 4176, 8193, 0, 4176, 8193, 0, 4192, 8193, 0, 4192, 8193, 0, 5060, 1, 0, 5064, 1, 0, 5076, 1, 0, 5080, 1, 0, 5092, 1, 0, 5096, 1, 0, 5508, 512, 0, 5512, 512, 0, 5524, 512, 0, 5528, 512, 0, 5540, 512, 0, 5544, 512, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 1088, 61443, 0, 1088, 61443, 0, 1088, 61443, 0, 1088, 61443, 0, 1088, 61443, 0, 1088, 61443, 0, 1792, 32771, 0, 1792, 32771, 0, 1792, 32771, 0, 2432, 73, 0, 2432, 73, 0, 2432, 73, 0, 4160, 8193, 0, 4160, 8193, 0, 4176, 8193, 0, 4176, 8193, 0, 4192, 8193, 0, 4192, 8193, 0, 5060, 1, 0, 5064, 1, 0, 5076, 1, 0, 5080, 1, 0, 5092, 1, 0, 5096, 1, 0, 5508, 512, 0, 5512, 512, 0, 5524, 512, 0, 5528, 512, 0, 5540, 512, 0, 5544, 512, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0, 5824, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580529583756046_738_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580529583756046_738_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..01f27c58 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580529583756046_738_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,304 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 11)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + } else { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((204 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((253 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i5 == 1)) { + continue; + } + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((297 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 3)) { + counter9 = (counter9 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 840 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 21509, 0, 1808, 21509, 0, 1808, 21509, 0, 1808, 21509, 0, 1808, 21509, 0, 1812, 21509, 0, 1812, 21509, 0, 1812, 21509, 0, 1812, 21509, 0, 1812, 21509, 0, 1824, 21509, 0, 1824, 21509, 0, 1824, 21509, 0, 1824, 21509, 0, 1824, 21509, 0, 1828, 21509, 0, 1828, 21509, 0, 1828, 21509, 0, 1828, 21509, 0, 1828, 21509, 0, 1840, 21509, 0, 1840, 21509, 0, 1840, 21509, 0, 1840, 21509, 0, 1840, 21509, 0, 1844, 21509, 0, 1844, 21509, 0, 1844, 21509, 0, 1844, 21509, 0, 1844, 21509, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 3408, 16385, 0, 3408, 16385, 0, 3412, 16385, 0, 3412, 16385, 0, 3424, 16385, 0, 3424, 16385, 0, 3428, 16385, 0, 3428, 16385, 0, 3440, 16385, 0, 3440, 16385, 0, 3444, 16385, 0, 3444, 16385, 0, 13072, 17, 0, 13072, 17, 0, 13076, 17, 0, 13076, 17, 0, 13080, 17, 0, 13080, 17, 0, 13088, 17, 0, 13088, 17, 0, 13092, 17, 0, 13092, 17, 0, 13096, 17, 0, 13096, 17, 0, 13104, 17, 0, 13104, 17, 0, 13108, 17, 0, 13108, 17, 0, 13112, 17, 0, 13112, 17, 0, 14224, 1, 0, 14240, 1, 0, 14256, 1, 0, 15168, 8192, 0, 15184, 8192, 0, 15200, 8192, 0, 16196, 8738, 0, 16196, 8738, 0, 16196, 8738, 0, 16196, 8738, 0, 16200, 8738, 0, 16200, 8738, 0, 16200, 8738, 0, 16200, 8738, 0, 16212, 8738, 0, 16212, 8738, 0, 16212, 8738, 0, 16212, 8738, 0, 16216, 8738, 0, 16216, 8738, 0, 16216, 8738, 0, 16216, 8738, 0, 16228, 8738, 0, 16228, 8738, 0, 16228, 8738, 0, 16228, 8738, 0, 16232, 8738, 0, 16232, 8738, 0, 16232, 8738, 0, 16232, 8738, 0, 20736, 32768, 0, 22464, 32768, 0, 1808, 21509, 0, 1808, 21509, 0, 1808, 21509, 0, 1808, 21509, 0, 1808, 21509, 0, 1812, 21509, 0, 1812, 21509, 0, 1812, 21509, 0, 1812, 21509, 0, 1812, 21509, 0, 1824, 21509, 0, 1824, 21509, 0, 1824, 21509, 0, 1824, 21509, 0, 1824, 21509, 0, 1828, 21509, 0, 1828, 21509, 0, 1828, 21509, 0, 1828, 21509, 0, 1828, 21509, 0, 1840, 21509, 0, 1840, 21509, 0, 1840, 21509, 0, 1840, 21509, 0, 1840, 21509, 0, 1844, 21509, 0, 1844, 21509, 0, 1844, 21509, 0, 1844, 21509, 0, 1844, 21509, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2704, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2708, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2720, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2724, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2736, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 2740, 21845, 0, 3408, 16385, 0, 3408, 16385, 0, 3412, 16385, 0, 3412, 16385, 0, 3424, 16385, 0, 3424, 16385, 0, 3428, 16385, 0, 3428, 16385, 0, 3440, 16385, 0, 3440, 16385, 0, 3444, 16385, 0, 3444, 16385, 0, 13072, 17, 0, 13072, 17, 0, 13076, 17, 0, 13076, 17, 0, 13080, 17, 0, 13080, 17, 0, 13088, 17, 0, 13088, 17, 0, 13092, 17, 0, 13092, 17, 0, 13096, 17, 0, 13096, 17, 0, 13104, 17, 0, 13104, 17, 0, 13108, 17, 0, 13108, 17, 0, 13112, 17, 0, 13112, 17, 0, 14224, 1, 0, 14240, 1, 0, 14256, 1, 0, 15168, 8192, 0, 15184, 8192, 0, 15200, 8192, 0, 16196, 8738, 0, 16196, 8738, 0, 16196, 8738, 0, 16196, 8738, 0, 16200, 8738, 0, 16200, 8738, 0, 16200, 8738, 0, 16200, 8738, 0, 16212, 8738, 0, 16212, 8738, 0, 16212, 8738, 0, 16212, 8738, 0, 16216, 8738, 0, 16216, 8738, 0, 16216, 8738, 0, 16216, 8738, 0, 16228, 8738, 0, 16228, 8738, 0, 16228, 8738, 0, 16228, 8738, 0, 16232, 8738, 0, 16232, 8738, 0, 16232, 8738, 0, 16232, 8738, 0, 20736, 32768, 0, 22464, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580575904433064_740_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580575904433064_740_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7a8c279f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580575904433064_740_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4048, 16384, 0, 4064, 16384, 0, 5584, 16384, 0, 5600, 16384, 0, 9040, 4, 0, 9056, 4, 0, 9488, 2048, 0, 9504, 2048, 0, 10320, 4, 0, 10336, 4, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4048, 16384, 0, 4064, 16384, 0, 5584, 16384, 0, 5600, 16384, 0, 9040, 4, 0, 9056, 4, 0, 9488, 2048, 0, 9504, 2048, 0, 10320, 4, 0, 10336, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580576325638387_741_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580576325638387_741_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a510e593 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580576325638387_741_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,390 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((371 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4288, 16, 0, 4608, 256, 0, 5760, 2, 0, 7936, 544, 0, 7936, 544, 0, 10176, 17476, 0, 10176, 17476, 0, 10176, 17476, 0, 10176, 17476, 0, 11088, 8, 0, 11104, 8, 0, 13200, 32776, 0, 13200, 32776, 0, 13216, 32776, 0, 13216, 32776, 0, 14864, 49152, 0, 14864, 49152, 0, 14880, 49152, 0, 14880, 49152, 0, 14896, 49152, 0, 14896, 49152, 0, 15312, 63488, 0, 15312, 63488, 0, 15312, 63488, 0, 15312, 63488, 0, 15312, 63488, 0, 15328, 63488, 0, 15328, 63488, 0, 15328, 63488, 0, 15328, 63488, 0, 15328, 63488, 0, 15344, 63488, 0, 15344, 63488, 0, 15344, 63488, 0, 15344, 63488, 0, 15344, 63488, 0, 16128, 85, 0, 16128, 85, 0, 16128, 85, 0, 16128, 85, 0, 18496, 32768, 0, 19776, 32768, 0, 24320, 32768, 0, 4288, 16, 0, 4608, 256, 0, 5760, 2, 0, 7936, 544, 0, 7936, 544, 0, 10176, 17476, 0, 10176, 17476, 0, 10176, 17476, 0, 10176, 17476, 0, 11088, 8, 0, 11104, 8, 0, 13200, 32776, 0, 13200, 32776, 0, 13216, 32776, 0, 13216, 32776, 0, 14864, 49152, 0, 14864, 49152, 0, 14880, 49152, 0, 14880, 49152, 0, 14896, 49152, 0, 14896, 49152, 0, 15312, 63488, 0, 15312, 63488, 0, 15312, 63488, 0, 15312, 63488, 0, 15312, 63488, 0, 15328, 63488, 0, 15328, 63488, 0, 15328, 63488, 0, 15328, 63488, 0, 15328, 63488, 0, 15344, 63488, 0, 15344, 63488, 0, 15344, 63488, 0, 15344, 63488, 0, 15344, 63488, 0, 16128, 85, 0, 16128, 85, 0, 16128, 85, 0, 16128, 85, 0, 18496, 32768, 0, 19776, 32768, 0, 24320, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580590725996831_743_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580590725996831_743_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..76562866 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580590725996831_743_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,521 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (357 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((380 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((394 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (418 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((444 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (455 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (464 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (469 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (476 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 5376, 256, 0, 7616, 288, 0, 7616, 288, 0, 18048, 16389, 0, 18048, 16389, 0, 18048, 16389, 0, 20352, 16384, 0, 21056, 5120, 0, 21056, 5120, 0, 24320, 128, 0, 24336, 128, 0, 26752, 1, 0, 28432, 1, 0, 28448, 1, 0, 29120, 1, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30464, 34952, 0, 30464, 34952, 0, 30464, 34952, 0, 30464, 34952, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 5376, 256, 0, 7616, 288, 0, 7616, 288, 0, 18048, 16389, 0, 18048, 16389, 0, 18048, 16389, 0, 20352, 16384, 0, 21056, 5120, 0, 21056, 5120, 0, 24320, 128, 0, 24336, 128, 0, 26752, 1, 0, 28432, 1, 0, 28448, 1, 0, 29120, 1, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30016, 26214, 0, 30464, 34952, 0, 30464, 34952, 0, 30464, 34952, 0, 30464, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580593045563650_744_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580593045563650_744_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..10a0ee73 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580593045563650_744_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,193 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2240, 32, 0, 2560, 17476, 0, 2560, 17476, 0, 2560, 17476, 0, 2560, 17476, 0, 3712, 8, 0, 3728, 8, 0, 6912, 8, 0, 6916, 8, 0, 6928, 8, 0, 6932, 8, 0, 7360, 8, 0, 7376, 8, 0, 9488, 2048, 0, 9492, 2048, 0, 9504, 2048, 0, 9508, 2048, 0, 9520, 2048, 0, 9524, 2048, 0, 576, 17, 0, 576, 17, 0, 2240, 32, 0, 2560, 17476, 0, 2560, 17476, 0, 2560, 17476, 0, 2560, 17476, 0, 3712, 8, 0, 3728, 8, 0, 6912, 8, 0, 6916, 8, 0, 6928, 8, 0, 6932, 8, 0, 7360, 8, 0, 7376, 8, 0, 9488, 2048, 0, 9492, 2048, 0, 9504, 2048, 0, 9508, 2048, 0, 9520, 2048, 0, 9524, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580593540418616_745_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580593540418616_745_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..43bb399d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580593540418616_745_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 1088, 43690, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580593648501467_746_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580593648501467_746_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580593648501467_746_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580593752844279_747_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580593752844279_747_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..64dacf7d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580593752844279_747_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,188 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 32769, 0, 3776, 32769, 0, 3792, 32769, 0, 3792, 32769, 0, 4864, 32769, 0, 4864, 32769, 0, 8704, 4096, 0, 9280, 1040, 0, 9280, 1040, 0, 9920, 4, 0, 3776, 32769, 0, 3776, 32769, 0, 3792, 32769, 0, 3792, 32769, 0, 4864, 32769, 0, 4864, 32769, 0, 8704, 4096, 0, 9280, 1040, 0, 9280, 1040, 0, 9920, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580594708208040_748_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580594708208040_748_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ac54cc32 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580594708208040_748_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 33344, 0, 2112, 33344, 0, 2112, 33344, 0, 2128, 33344, 0, 2128, 33344, 0, 2128, 33344, 0, 3264, 32769, 0, 3264, 32769, 0, 3280, 32769, 0, 3280, 32769, 0, 4736, 72, 0, 4736, 72, 0, 4752, 72, 0, 4752, 72, 0, 5968, 1040, 0, 5968, 1040, 0, 5984, 1040, 0, 5984, 1040, 0, 6868, 8192, 0, 6872, 8192, 0, 6884, 8192, 0, 6888, 8192, 0, 7316, 8192, 0, 7320, 8192, 0, 7332, 8192, 0, 7336, 8192, 0, 7888, 1040, 0, 7888, 1040, 0, 7904, 1040, 0, 7904, 1040, 0, 8192, 18724, 0, 8192, 18724, 0, 8192, 18724, 0, 8192, 18724, 0, 8192, 18724, 0, 2112, 33344, 0, 2112, 33344, 0, 2112, 33344, 0, 2128, 33344, 0, 2128, 33344, 0, 2128, 33344, 0, 3264, 32769, 0, 3264, 32769, 0, 3280, 32769, 0, 3280, 32769, 0, 4736, 72, 0, 4736, 72, 0, 4752, 72, 0, 4752, 72, 0, 5968, 1040, 0, 5968, 1040, 0, 5984, 1040, 0, 5984, 1040, 0, 6868, 8192, 0, 6872, 8192, 0, 6884, 8192, 0, 6888, 8192, 0, 7316, 8192, 0, 7320, 8192, 0, 7332, 8192, 0, 7336, 8192, 0, 7888, 1040, 0, 7888, 1040, 0, 7904, 1040, 0, 7904, 1040, 0, 8192, 18724, 0, 8192, 18724, 0, 8192, 18724, 0, 8192, 18724, 0, 8192, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580597085219389_749_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580597085219389_749_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..774956da --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580597085219389_749_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,155 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3904, 8322, 0, 3904, 8322, 0, 3904, 8322, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3904, 8322, 0, 3904, 8322, 0, 3904, 8322, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580601338684383_751_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580601338684383_751_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..88545fb5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580601338684383_751_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580601459888351_752_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580601459888351_752_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1d97be2a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580601459888351_752_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2320, 49153, 0, 2320, 49153, 0, 2320, 49153, 0, 3152, 1, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2320, 49153, 0, 2320, 49153, 0, 2320, 49153, 0, 3152, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580641800650057_756_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580641800650057_756_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3727781e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580641800650057_756_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,81 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 32768, 0, 912, 32768, 0, 928, 32768, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 896, 32768, 0, 912, 32768, 0, 928, 32768, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1924, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1928, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1940, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1944, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1956, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0, 1960, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580643126030073_757_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580643126030073_757_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..54954119 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580643126030073_757_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,656 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 4))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((362 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((400 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((405 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((409 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((426 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((443 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((452 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((469 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (477 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (495 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (505 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (515 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (524 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (529 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((565 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (572 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (587 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((601 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((610 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 1)) { + break; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (624 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (631 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((646 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((672 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((698 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((707 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((728 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter8 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((738 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 64, 0, 5504, 1025, 0, 5504, 1025, 0, 9280, 1, 0, 9296, 1, 0, 9728, 4, 0, 9744, 4, 0, 10176, 16384, 0, 10192, 16384, 0, 15104, 64, 0, 25616, 1024, 0, 28368, 130, 0, 28368, 130, 0, 28372, 130, 0, 28372, 130, 0, 28944, 130, 0, 28944, 130, 0, 28948, 130, 0, 28948, 130, 0, 30032, 2, 0, 30528, 18724, 0, 30528, 18724, 0, 30528, 18724, 0, 30528, 18724, 0, 30528, 18724, 0, 31680, 32769, 0, 31680, 32769, 0, 32320, 1, 0, 32960, 1, 0, 37568, 4096, 0, 38480, 1, 0, 39056, 8192, 0, 39936, 32769, 0, 39936, 32769, 0, 41344, 3584, 0, 41344, 3584, 0, 41344, 3584, 0, 41360, 3584, 0, 41360, 3584, 0, 41360, 3584, 0, 43012, 2082, 0, 43012, 2082, 0, 43012, 2082, 0, 43028, 2082, 0, 43028, 2082, 0, 43028, 2082, 0, 44676, 1040, 0, 44676, 1040, 0, 44692, 1040, 0, 44692, 1040, 0, 45252, 2602, 0, 45252, 2602, 0, 45252, 2602, 0, 45252, 2602, 0, 45252, 2602, 0, 45268, 2602, 0, 45268, 2602, 0, 45268, 2602, 0, 45268, 2602, 0, 45268, 2602, 0, 46596, 74, 0, 46596, 74, 0, 46596, 74, 0, 46612, 74, 0, 46612, 74, 0, 46612, 74, 0, 4096, 64, 0, 5504, 1025, 0, 5504, 1025, 0, 9280, 1, 0, 9296, 1, 0, 9728, 4, 0, 9744, 4, 0, 10176, 16384, 0, 10192, 16384, 0, 15104, 64, 0, 25616, 1024, 0, 28368, 130, 0, 28368, 130, 0, 28372, 130, 0, 28372, 130, 0, 28944, 130, 0, 28944, 130, 0, 28948, 130, 0, 28948, 130, 0, 30032, 2, 0, 30528, 18724, 0, 30528, 18724, 0, 30528, 18724, 0, 30528, 18724, 0, 30528, 18724, 0, 31680, 32769, 0, 31680, 32769, 0, 32320, 1, 0, 32960, 1, 0, 37568, 4096, 0, 38480, 1, 0, 39056, 8192, 0, 39936, 32769, 0, 39936, 32769, 0, 41344, 3584, 0, 41344, 3584, 0, 41344, 3584, 0, 41360, 3584, 0, 41360, 3584, 0, 41360, 3584, 0, 43012, 2082, 0, 43012, 2082, 0, 43012, 2082, 0, 43028, 2082, 0, 43028, 2082, 0, 43028, 2082, 0, 44676, 1040, 0, 44676, 1040, 0, 44692, 1040, 0, 44692, 1040, 0, 45252, 2602, 0, 45252, 2602, 0, 45252, 2602, 0, 45252, 2602, 0, 45252, 2602, 0, 45268, 2602, 0, 45268, 2602, 0, 45268, 2602, 0, 45268, 2602, 0, 45268, 2602, 0, 46596, 74, 0, 46596, 74, 0, 46596, 74, 0, 46612, 74, 0, 46612, 74, 0, 46612, 74, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580704973846863_760_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580704973846863_760_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..65999d57 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580704973846863_760_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10))) { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4032, 16, 0, 4672, 2, 0, 4032, 16, 0, 4672, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580705145536697_761_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580705145536697_761_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..afa84be1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580705145536697_761_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,380 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((339 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3472, 4096, 0, 3488, 4096, 0, 5376, 128, 0, 10304, 16, 0, 11520, 1040, 0, 11520, 1040, 0, 18112, 16644, 0, 18112, 16644, 0, 18112, 16644, 0, 19328, 73, 0, 19328, 73, 0, 19328, 73, 0, 20800, 144, 0, 20800, 144, 0, 20816, 144, 0, 20816, 144, 0, 21700, 2, 0, 21704, 2, 0, 21708, 2, 0, 21716, 2, 0, 21720, 2, 0, 21724, 2, 0, 22656, 1024, 0, 22672, 1024, 0, 23872, 1040, 0, 23872, 1040, 0, 26048, 16384, 0, 26688, 85, 0, 26688, 85, 0, 26688, 85, 0, 26688, 85, 0, 3472, 4096, 0, 3488, 4096, 0, 5376, 128, 0, 10304, 16, 0, 11520, 1040, 0, 11520, 1040, 0, 18112, 16644, 0, 18112, 16644, 0, 18112, 16644, 0, 19328, 73, 0, 19328, 73, 0, 19328, 73, 0, 20800, 144, 0, 20800, 144, 0, 20816, 144, 0, 20816, 144, 0, 21700, 2, 0, 21704, 2, 0, 21708, 2, 0, 21716, 2, 0, 21720, 2, 0, 21724, 2, 0, 22656, 1024, 0, 22672, 1024, 0, 23872, 1040, 0, 23872, 1040, 0, 26048, 16384, 0, 26688, 85, 0, 26688, 85, 0, 26688, 85, 0, 26688, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580723317923150_762_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580723317923150_762_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4f698bf9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580723317923150_762_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,229 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 4416, 8744, 0, 4416, 8744, 0, 4416, 8744, 0, 4416, 8744, 0, 4160, 56321, 0, 4160, 56321, 0, 4160, 56321, 0, 4160, 56321, 0, 4160, 56321, 0, 4160, 56321, 0, 3776, 22, 0, 3776, 22, 0, 3776, 22, 0, 5056, 17, 0, 5056, 17, 0, 5952, 17476, 0, 5952, 17476, 0, 5952, 17476, 0, 5952, 17476, 0, 7296, 32776, 0, 7296, 32776, 0, 9792, 128, 0, 10112, 2048, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 4416, 8744, 0, 4416, 8744, 0, 4416, 8744, 0, 4416, 8744, 0, 4160, 56321, 0, 4160, 56321, 0, 4160, 56321, 0, 4160, 56321, 0, 4160, 56321, 0, 4160, 56321, 0, 3776, 22, 0, 3776, 22, 0, 3776, 22, 0, 5056, 17, 0, 5056, 17, 0, 5952, 17476, 0, 5952, 17476, 0, 5952, 17476, 0, 5952, 17476, 0, 7296, 32776, 0, 7296, 32776, 0, 9792, 128, 0, 10112, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580723789880560_763_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580723789880560_763_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580723789880560_763_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580746886122915_765_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580746886122915_765_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..00d2abb9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580746886122915_765_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 36992, 0, 2048, 36992, 0, 2048, 36992, 0, 1664, 32, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0, 2048, 36992, 0, 2048, 36992, 0, 2048, 36992, 0, 1664, 32, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0, 1408, 17749, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580749933747222_767_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580749933747222_767_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ce175c7e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580749933747222_767_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,278 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4800, 16, 0, 7552, 8192, 0, 9680, 8192, 0, 9684, 8192, 0, 9688, 8192, 0, 9696, 8192, 0, 9700, 8192, 0, 9704, 8192, 0, 11200, 8192, 0, 13460, 2, 0, 13464, 2, 0, 13476, 2, 0, 13480, 2, 0, 13492, 2, 0, 13496, 2, 0, 14096, 512, 0, 14112, 512, 0, 14128, 512, 0, 14400, 17476, 0, 14400, 17476, 0, 14400, 17476, 0, 14400, 17476, 0, 14848, 34952, 0, 14848, 34952, 0, 14848, 34952, 0, 14848, 34952, 0, 4800, 16, 0, 7552, 8192, 0, 9680, 8192, 0, 9684, 8192, 0, 9688, 8192, 0, 9696, 8192, 0, 9700, 8192, 0, 9704, 8192, 0, 11200, 8192, 0, 13460, 2, 0, 13464, 2, 0, 13476, 2, 0, 13480, 2, 0, 13492, 2, 0, 13496, 2, 0, 14096, 512, 0, 14112, 512, 0, 14128, 512, 0, 14400, 17476, 0, 14400, 17476, 0, 14400, 17476, 0, 14400, 17476, 0, 14848, 34952, 0, 14848, 34952, 0, 14848, 34952, 0, 14848, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580756946103131_769_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580756946103131_769_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6c53c375 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580756946103131_769_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 2048, 73, 0, 2048, 73, 0, 2048, 73, 0, 2624, 1040, 0, 2624, 1040, 0, 6480, 4, 0, 6496, 4, 0, 6512, 4, 0, 7184, 2052, 0, 7184, 2052, 0, 7200, 2052, 0, 7200, 2052, 0, 7216, 2052, 0, 7216, 2052, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 2048, 73, 0, 2048, 73, 0, 2048, 73, 0, 2624, 1040, 0, 2624, 1040, 0, 6480, 4, 0, 6496, 4, 0, 6512, 4, 0, 7184, 2052, 0, 7184, 2052, 0, 7200, 2052, 0, 7200, 2052, 0, 7216, 2052, 0, 7216, 2052, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580763174086362_771_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580763174086362_771_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f1c13d54 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580763174086362_771_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,432 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((135 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 2)) { + break; + } + } + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((339 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((383 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((398 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((446 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((463 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((470 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((477 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((496 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter7 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 1, 0, 2768, 1, 0, 2784, 1, 0, 4736, 32776, 0, 4736, 32776, 0, 5440, 32777, 0, 5440, 32777, 0, 5440, 32777, 0, 7488, 18, 0, 7488, 18, 0, 7492, 18, 0, 7492, 18, 0, 7504, 18, 0, 7504, 18, 0, 7508, 18, 0, 7508, 18, 0, 7520, 18, 0, 7520, 18, 0, 7524, 18, 0, 7524, 18, 0, 8640, 8194, 0, 8640, 8194, 0, 8644, 8194, 0, 8644, 8194, 0, 8656, 8194, 0, 8656, 8194, 0, 8660, 8194, 0, 8660, 8194, 0, 8672, 8194, 0, 8672, 8194, 0, 8676, 8194, 0, 8676, 8194, 0, 9088, 2, 0, 9092, 2, 0, 9104, 2, 0, 9108, 2, 0, 9120, 2, 0, 9124, 2, 0, 10880, 1056, 0, 10880, 1056, 0, 10896, 1056, 0, 10896, 1056, 0, 10912, 1056, 0, 10912, 1056, 0, 11520, 17, 0, 11520, 17, 0, 18112, 16384, 0, 21712, 4, 0, 21728, 4, 0, 24528, 1024, 0, 24544, 1024, 0, 26880, 34952, 0, 26880, 34952, 0, 26880, 34952, 0, 26880, 34952, 0, 28560, 16457, 0, 28560, 16457, 0, 28560, 16457, 0, 28560, 16457, 0, 28576, 16457, 0, 28576, 16457, 0, 28576, 16457, 0, 28576, 16457, 0, 31760, 18564, 0, 31760, 18564, 0, 31760, 18564, 0, 31760, 18564, 0, 31776, 18564, 0, 31776, 18564, 0, 31776, 18564, 0, 31776, 18564, 0, 2752, 1, 0, 2768, 1, 0, 2784, 1, 0, 4736, 32776, 0, 4736, 32776, 0, 5440, 32777, 0, 5440, 32777, 0, 5440, 32777, 0, 7488, 18, 0, 7488, 18, 0, 7492, 18, 0, 7492, 18, 0, 7504, 18, 0, 7504, 18, 0, 7508, 18, 0, 7508, 18, 0, 7520, 18, 0, 7520, 18, 0, 7524, 18, 0, 7524, 18, 0, 8640, 8194, 0, 8640, 8194, 0, 8644, 8194, 0, 8644, 8194, 0, 8656, 8194, 0, 8656, 8194, 0, 8660, 8194, 0, 8660, 8194, 0, 8672, 8194, 0, 8672, 8194, 0, 8676, 8194, 0, 8676, 8194, 0, 9088, 2, 0, 9092, 2, 0, 9104, 2, 0, 9108, 2, 0, 9120, 2, 0, 9124, 2, 0, 10880, 1056, 0, 10880, 1056, 0, 10896, 1056, 0, 10896, 1056, 0, 10912, 1056, 0, 10912, 1056, 0, 11520, 17, 0, 11520, 17, 0, 18112, 16384, 0, 21712, 4, 0, 21728, 4, 0, 24528, 1024, 0, 24544, 1024, 0, 26880, 34952, 0, 26880, 34952, 0, 26880, 34952, 0, 26880, 34952, 0, 28560, 16457, 0, 28560, 16457, 0, 28560, 16457, 0, 28560, 16457, 0, 28576, 16457, 0, 28576, 16457, 0, 28576, 16457, 0, 28576, 16457, 0, 31760, 18564, 0, 31760, 18564, 0, 31760, 18564, 0, 31760, 18564, 0, 31776, 18564, 0, 31776, 18564, 0, 31776, 18564, 0, 31776, 18564, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580811820225524_772_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580811820225524_772_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cfa51d11 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580811820225524_772_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,431 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 366 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2112, 4, 0, 2128, 4, 0, 3776, 1024, 0, 3792, 1024, 0, 4096, 4, 0, 4112, 4, 0, 5312, 4, 0, 5328, 4, 0, 5760, 16384, 0, 5776, 16384, 0, 15936, 73, 0, 15936, 73, 0, 15936, 73, 0, 17152, 46081, 0, 17152, 46081, 0, 17152, 46081, 0, 17152, 46081, 0, 17152, 46081, 0, 17168, 46081, 0, 17168, 46081, 0, 17168, 46081, 0, 17168, 46081, 0, 17168, 46081, 0, 18048, 5201, 0, 18048, 5201, 0, 18048, 5201, 0, 18048, 5201, 0, 18048, 5201, 0, 18064, 5201, 0, 18064, 5201, 0, 18064, 5201, 0, 18064, 5201, 0, 18064, 5201, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 22528, 258, 0, 22528, 258, 0, 22144, 32768, 0, 21760, 16904, 0, 21760, 16904, 0, 21760, 16904, 0, 21504, 10400, 0, 21504, 10400, 0, 21504, 10400, 0, 21504, 10400, 0, 576, 17, 0, 576, 17, 0, 2112, 4, 0, 2128, 4, 0, 3776, 1024, 0, 3792, 1024, 0, 4096, 4, 0, 4112, 4, 0, 5312, 4, 0, 5328, 4, 0, 5760, 16384, 0, 5776, 16384, 0, 15936, 73, 0, 15936, 73, 0, 15936, 73, 0, 17152, 46081, 0, 17152, 46081, 0, 17152, 46081, 0, 17152, 46081, 0, 17152, 46081, 0, 17168, 46081, 0, 17168, 46081, 0, 17168, 46081, 0, 17168, 46081, 0, 17168, 46081, 0, 18048, 5201, 0, 18048, 5201, 0, 18048, 5201, 0, 18048, 5201, 0, 18048, 5201, 0, 18064, 5201, 0, 18064, 5201, 0, 18064, 5201, 0, 18064, 5201, 0, 18064, 5201, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 19072, 65535, 0, 22528, 258, 0, 22528, 258, 0, 22144, 32768, 0, 21760, 16904, 0, 21760, 16904, 0, 21760, 16904, 0, 21504, 10400, 0, 21504, 10400, 0, 21504, 10400, 0, 21504, 10400, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580819782405214_775_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580819782405214_775_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bf93e880 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580819782405214_775_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 960, 31, 0, 960, 31, 0, 960, 31, 0, 960, 31, 0, 960, 31, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 1344, 65280, 0, 960, 31, 0, 960, 31, 0, 960, 31, 0, 960, 31, 0, 960, 31, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580819900671614_776_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580819900671614_776_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d707c6f5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580819900671614_776_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,188 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2560, 800, 0, 2560, 800, 0, 2560, 800, 0, 2564, 800, 0, 2564, 800, 0, 2564, 800, 0, 2576, 800, 0, 2576, 800, 0, 2576, 800, 0, 2580, 800, 0, 2580, 800, 0, 2580, 800, 0, 2592, 800, 0, 2592, 800, 0, 2592, 800, 0, 2596, 800, 0, 2596, 800, 0, 2596, 800, 0, 5264, 4617, 0, 5264, 4617, 0, 5264, 4617, 0, 5264, 4617, 0, 5280, 4617, 0, 5280, 4617, 0, 5280, 4617, 0, 5280, 4617, 0, 5296, 4617, 0, 5296, 4617, 0, 5296, 4617, 0, 5296, 4617, 0, 5824, 1040, 0, 5824, 1040, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 7040, 85, 0, 7040, 85, 0, 7040, 85, 0, 7040, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2560, 800, 0, 2560, 800, 0, 2560, 800, 0, 2564, 800, 0, 2564, 800, 0, 2564, 800, 0, 2576, 800, 0, 2576, 800, 0, 2576, 800, 0, 2580, 800, 0, 2580, 800, 0, 2580, 800, 0, 2592, 800, 0, 2592, 800, 0, 2592, 800, 0, 2596, 800, 0, 2596, 800, 0, 2596, 800, 0, 5264, 4617, 0, 5264, 4617, 0, 5264, 4617, 0, 5264, 4617, 0, 5280, 4617, 0, 5280, 4617, 0, 5280, 4617, 0, 5280, 4617, 0, 5296, 4617, 0, 5296, 4617, 0, 5296, 4617, 0, 5296, 4617, 0, 5824, 1040, 0, 5824, 1040, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 6144, 18724, 0, 7040, 85, 0, 7040, 85, 0, 7040, 85, 0, 7040, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580824275363070_778_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580824275363070_778_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a596039a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580824275363070_778_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((286 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 2049, 0, 2880, 2049, 0, 2896, 2049, 0, 2896, 2049, 0, 4096, 1, 0, 4112, 1, 0, 5184, 68, 0, 5184, 68, 0, 5200, 68, 0, 5200, 68, 0, 6976, 16, 0, 6992, 16, 0, 7680, 32771, 0, 7680, 32771, 0, 7680, 32771, 0, 7696, 32771, 0, 7696, 32771, 0, 7696, 32771, 0, 15936, 4, 0, 17408, 16384, 0, 17424, 16384, 0, 17440, 16384, 0, 19904, 5120, 0, 19904, 5120, 0, 19920, 5120, 0, 19920, 5120, 0, 19936, 5120, 0, 19936, 5120, 0, 2880, 2049, 0, 2880, 2049, 0, 2896, 2049, 0, 2896, 2049, 0, 4096, 1, 0, 4112, 1, 0, 5184, 68, 0, 5184, 68, 0, 5200, 68, 0, 5200, 68, 0, 6976, 16, 0, 6992, 16, 0, 7680, 32771, 0, 7680, 32771, 0, 7680, 32771, 0, 7696, 32771, 0, 7696, 32771, 0, 7696, 32771, 0, 15936, 4, 0, 17408, 16384, 0, 17424, 16384, 0, 17440, 16384, 0, 19904, 5120, 0, 19904, 5120, 0, 19920, 5120, 0, 19920, 5120, 0, 19936, 5120, 0, 19936, 5120, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580825862927926_779_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580825862927926_779_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ae99776d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580825862927926_779_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,374 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((202 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3216, 2, 0, 3220, 2, 0, 3232, 2, 0, 3236, 2, 0, 3248, 2, 0, 3252, 2, 0, 8336, 256, 0, 8352, 256, 0, 8368, 256, 0, 9024, 256, 0, 9984, 17408, 0, 9984, 17408, 0, 12928, 16384, 0, 12932, 16384, 0, 12944, 16384, 0, 12948, 16384, 0, 13696, 16384, 0, 13712, 16384, 0, 14144, 16384, 0, 20544, 32, 0, 20864, 17476, 0, 20864, 17476, 0, 20864, 17476, 0, 20864, 17476, 0, 22208, 32768, 0, 24576, 2048, 0, 3216, 2, 0, 3220, 2, 0, 3232, 2, 0, 3236, 2, 0, 3248, 2, 0, 3252, 2, 0, 8336, 256, 0, 8352, 256, 0, 8368, 256, 0, 9024, 256, 0, 9984, 17408, 0, 9984, 17408, 0, 12928, 16384, 0, 12932, 16384, 0, 12944, 16384, 0, 12948, 16384, 0, 13696, 16384, 0, 13712, 16384, 0, 14144, 16384, 0, 20544, 32, 0, 20864, 17476, 0, 20864, 17476, 0, 20864, 17476, 0, 20864, 17476, 0, 22208, 32768, 0, 24576, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580840177688497_780_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580840177688497_780_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..682b82c8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580840177688497_780_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,458 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((412 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((427 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (437 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (441 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [13696, 34, 0, 13696, 34, 0, 13712, 34, 0, 13712, 34, 0, 13728, 34, 0, 13728, 34, 0, 18240, 16384, 0, 18256, 16384, 0, 18272, 16384, 0, 18880, 34952, 0, 18880, 34952, 0, 18880, 34952, 0, 18880, 34952, 0, 19776, 17, 0, 19776, 17, 0, 21200, 8224, 0, 21200, 8224, 0, 21216, 8224, 0, 21216, 8224, 0, 22672, 32, 0, 22688, 32, 0, 23168, 17476, 0, 23168, 17476, 0, 23168, 17476, 0, 23168, 17476, 0, 23616, 34952, 0, 23616, 34952, 0, 23616, 34952, 0, 23616, 34952, 0, 24256, 17, 0, 24256, 17, 0, 26384, 17476, 0, 26384, 17476, 0, 26384, 17476, 0, 26384, 17476, 0, 26388, 17476, 0, 26388, 17476, 0, 26388, 17476, 0, 26388, 17476, 0, 26392, 17476, 0, 26392, 17476, 0, 26392, 17476, 0, 26392, 17476, 0, 26400, 17476, 0, 26400, 17476, 0, 26400, 17476, 0, 26400, 17476, 0, 26404, 17476, 0, 26404, 17476, 0, 26404, 17476, 0, 26404, 17476, 0, 26408, 17476, 0, 26408, 17476, 0, 26408, 17476, 0, 26408, 17476, 0, 27344, 514, 0, 27344, 514, 0, 27360, 514, 0, 27360, 514, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 13696, 34, 0, 13696, 34, 0, 13712, 34, 0, 13712, 34, 0, 13728, 34, 0, 13728, 34, 0, 18240, 16384, 0, 18256, 16384, 0, 18272, 16384, 0, 18880, 34952, 0, 18880, 34952, 0, 18880, 34952, 0, 18880, 34952, 0, 19776, 17, 0, 19776, 17, 0, 21200, 8224, 0, 21200, 8224, 0, 21216, 8224, 0, 21216, 8224, 0, 22672, 32, 0, 22688, 32, 0, 23168, 17476, 0, 23168, 17476, 0, 23168, 17476, 0, 23168, 17476, 0, 23616, 34952, 0, 23616, 34952, 0, 23616, 34952, 0, 23616, 34952, 0, 24256, 17, 0, 24256, 17, 0, 26384, 17476, 0, 26384, 17476, 0, 26384, 17476, 0, 26384, 17476, 0, 26388, 17476, 0, 26388, 17476, 0, 26388, 17476, 0, 26388, 17476, 0, 26392, 17476, 0, 26392, 17476, 0, 26392, 17476, 0, 26392, 17476, 0, 26400, 17476, 0, 26400, 17476, 0, 26400, 17476, 0, 26400, 17476, 0, 26404, 17476, 0, 26404, 17476, 0, 26404, 17476, 0, 26404, 17476, 0, 26408, 17476, 0, 26408, 17476, 0, 26408, 17476, 0, 26408, 17476, 0, 27344, 514, 0, 27344, 514, 0, 27360, 514, 0, 27360, 514, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0, 27968, 61166, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580912045812695_782_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580912045812695_782_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a7fe2b3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580912045812695_782_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,355 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 73, 0, 2048, 73, 0, 2048, 73, 0, 6848, 18724, 0, 6848, 18724, 0, 6848, 18724, 0, 6848, 18724, 0, 6848, 18724, 0, 10304, 1, 0, 14336, 8192, 0, 14352, 8192, 0, 17344, 16, 0, 18752, 16384, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 73, 0, 2048, 73, 0, 2048, 73, 0, 6848, 18724, 0, 6848, 18724, 0, 6848, 18724, 0, 6848, 18724, 0, 6848, 18724, 0, 10304, 1, 0, 14336, 8192, 0, 14352, 8192, 0, 17344, 16, 0, 18752, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580912538879469_783_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580912538879469_783_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c1f79651 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580912538879469_783_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,515 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((379 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((393 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((403 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((412 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((416 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((431 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((456 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((463 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + if ((i5 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (473 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (487 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((509 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((527 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((538 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((549 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 456 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 4096, 0, 1296, 4096, 0, 1856, 16, 0, 2176, 256, 0, 4432, 17476, 0, 4432, 17476, 0, 4432, 17476, 0, 4432, 17476, 0, 4436, 17476, 0, 4436, 17476, 0, 4436, 17476, 0, 4436, 17476, 0, 4440, 17476, 0, 4440, 17476, 0, 4440, 17476, 0, 4440, 17476, 0, 4448, 17476, 0, 4448, 17476, 0, 4448, 17476, 0, 4448, 17476, 0, 4452, 17476, 0, 4452, 17476, 0, 4452, 17476, 0, 4452, 17476, 0, 4456, 17476, 0, 4456, 17476, 0, 4456, 17476, 0, 4456, 17476, 0, 10240, 8, 0, 11072, 36864, 0, 11072, 36864, 0, 14976, 32768, 0, 16000, 65, 0, 16000, 65, 0, 16640, 65, 0, 16640, 65, 0, 24256, 4, 0, 24272, 4, 0, 29184, 16384, 0, 29200, 16384, 0, 29632, 4, 0, 29648, 4, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 32592, 2, 0, 32608, 2, 0, 33748, 32778, 0, 33748, 32778, 0, 33748, 32778, 0, 33752, 32778, 0, 33752, 32778, 0, 33752, 32778, 0, 33764, 32778, 0, 33764, 32778, 0, 33764, 32778, 0, 33768, 32778, 0, 33768, 32778, 0, 33768, 32778, 0, 34452, 40960, 0, 34452, 40960, 0, 34456, 40960, 0, 34456, 40960, 0, 34468, 40960, 0, 34468, 40960, 0, 34472, 40960, 0, 34472, 40960, 0, 35152, 544, 0, 35152, 544, 0, 35168, 544, 0, 35168, 544, 0, 1280, 4096, 0, 1296, 4096, 0, 1856, 16, 0, 2176, 256, 0, 4432, 17476, 0, 4432, 17476, 0, 4432, 17476, 0, 4432, 17476, 0, 4436, 17476, 0, 4436, 17476, 0, 4436, 17476, 0, 4436, 17476, 0, 4440, 17476, 0, 4440, 17476, 0, 4440, 17476, 0, 4440, 17476, 0, 4448, 17476, 0, 4448, 17476, 0, 4448, 17476, 0, 4448, 17476, 0, 4452, 17476, 0, 4452, 17476, 0, 4452, 17476, 0, 4452, 17476, 0, 4456, 17476, 0, 4456, 17476, 0, 4456, 17476, 0, 4456, 17476, 0, 10240, 8, 0, 11072, 36864, 0, 11072, 36864, 0, 14976, 32768, 0, 16000, 65, 0, 16000, 65, 0, 16640, 65, 0, 16640, 65, 0, 24256, 4, 0, 24272, 4, 0, 29184, 16384, 0, 29200, 16384, 0, 29632, 4, 0, 29648, 4, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 31168, 43690, 0, 32592, 2, 0, 32608, 2, 0, 33748, 32778, 0, 33748, 32778, 0, 33748, 32778, 0, 33752, 32778, 0, 33752, 32778, 0, 33752, 32778, 0, 33764, 32778, 0, 33764, 32778, 0, 33764, 32778, 0, 33768, 32778, 0, 33768, 32778, 0, 33768, 32778, 0, 34452, 40960, 0, 34452, 40960, 0, 34456, 40960, 0, 34456, 40960, 0, 34468, 40960, 0, 34468, 40960, 0, 34472, 40960, 0, 34472, 40960, 0, 35152, 544, 0, 35152, 544, 0, 35168, 544, 0, 35168, 544, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580949741040933_784_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580949741040933_784_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9b4631d3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580949741040933_784_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 6528, 34816, 0, 6528, 34816, 0, 6976, 32768, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 6528, 34816, 0, 6528, 34816, 0, 6976, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580949934359463_785_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580949934359463_785_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b46400ce --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580949934359463_785_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,164 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 3456, 16, 0, 6420, 40960, 0, 6420, 40960, 0, 6424, 40960, 0, 6424, 40960, 0, 6420, 20481, 0, 6420, 20481, 0, 6420, 20481, 0, 6424, 20481, 0, 6424, 20481, 0, 6424, 20481, 0, 6436, 40960, 0, 6436, 40960, 0, 6440, 40960, 0, 6440, 40960, 0, 6436, 20481, 0, 6436, 20481, 0, 6436, 20481, 0, 6440, 20481, 0, 6440, 20481, 0, 6440, 20481, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 1216, 61471, 0, 3456, 16, 0, 6420, 40960, 0, 6420, 40960, 0, 6424, 40960, 0, 6424, 40960, 0, 6420, 20481, 0, 6420, 20481, 0, 6420, 20481, 0, 6424, 20481, 0, 6424, 20481, 0, 6424, 20481, 0, 6436, 40960, 0, 6436, 40960, 0, 6440, 40960, 0, 6440, 40960, 0, 6436, 20481, 0, 6436, 20481, 0, 6436, 20481, 0, 6440, 20481, 0, 6440, 20481, 0, 6440, 20481, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0, 7552, 61471, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580953626060090_786_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580953626060090_786_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e3de013d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580953626060090_786_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14528, 4096, 0, 14272, 1281, 0, 14272, 1281, 0, 14272, 1281, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14784, 43690, 0, 14528, 4096, 0, 14272, 1281, 0, 14272, 1281, 0, 14272, 1281, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580953870712068_787_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580953870712068_787_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..127b80ea --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580953870712068_787_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,218 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 32769, 0, 1280, 32769, 0, 2368, 1, 0, 2384, 1, 0, 2400, 1, 0, 4480, 32768, 0, 4496, 32768, 0, 4512, 32768, 0, 5440, 64, 0, 6352, 4096, 0, 8768, 512, 0, 9344, 1040, 0, 9344, 1040, 0, 9664, 18724, 0, 9664, 18724, 0, 9664, 18724, 0, 9664, 18724, 0, 9664, 18724, 0, 10304, 85, 0, 10304, 85, 0, 10304, 85, 0, 10304, 85, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 1280, 32769, 0, 1280, 32769, 0, 2368, 1, 0, 2384, 1, 0, 2400, 1, 0, 4480, 32768, 0, 4496, 32768, 0, 4512, 32768, 0, 5440, 64, 0, 6352, 4096, 0, 8768, 512, 0, 9344, 1040, 0, 9344, 1040, 0, 9664, 18724, 0, 9664, 18724, 0, 9664, 18724, 0, 9664, 18724, 0, 9664, 18724, 0, 10304, 85, 0, 10304, 85, 0, 10304, 85, 0, 10304, 85, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0, 10880, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580954883850396_788_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580954883850396_788_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9753f7dc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580954883850396_788_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,110 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 15))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7872, 128, 0, 7872, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580965775607112_790_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580965775607112_790_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db49637e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580965775607112_790_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,135 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580966072795580_791_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580966072795580_791_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6f65403c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580966072795580_791_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,103 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 49159, 0, 1088, 49159, 0, 1088, 49159, 0, 1088, 49159, 0, 1088, 49159, 0, 2896, 8, 0, 2912, 8, 0, 2928, 8, 0, 5264, 20, 0, 5264, 20, 0, 5280, 20, 0, 5280, 20, 0, 5296, 20, 0, 5296, 20, 0, 1088, 49159, 0, 1088, 49159, 0, 1088, 49159, 0, 1088, 49159, 0, 1088, 49159, 0, 2896, 8, 0, 2912, 8, 0, 2928, 8, 0, 5264, 20, 0, 5264, 20, 0, 5280, 20, 0, 5280, 20, 0, 5296, 20, 0, 5296, 20, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580966652464040_792_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580966652464040_792_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3c0cef90 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580966652464040_792_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5648, 16384, 0, 5664, 16384, 0, 9872, 1024, 0, 9888, 1024, 0, 9904, 1024, 0, 10304, 20480, 0, 10304, 20480, 0, 10944, 65, 0, 10944, 65, 0, 11520, 4161, 0, 11520, 4161, 0, 11520, 4161, 0, 11840, 20801, 0, 11840, 20801, 0, 11840, 20801, 0, 11840, 20801, 0, 11840, 20801, 0, 12544, 20480, 0, 12544, 20480, 0, 14016, 49152, 0, 14016, 49152, 0, 14464, 57344, 0, 14464, 57344, 0, 14464, 57344, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5648, 16384, 0, 5664, 16384, 0, 9872, 1024, 0, 9888, 1024, 0, 9904, 1024, 0, 10304, 20480, 0, 10304, 20480, 0, 10944, 65, 0, 10944, 65, 0, 11520, 4161, 0, 11520, 4161, 0, 11520, 4161, 0, 11840, 20801, 0, 11840, 20801, 0, 11840, 20801, 0, 11840, 20801, 0, 11840, 20801, 0, 12544, 20480, 0, 12544, 20480, 0, 14016, 49152, 0, 14016, 49152, 0, 14464, 57344, 0, 14464, 57344, 0, 14464, 57344, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580967531068022_793_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580967531068022_793_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cc1ad61b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580967531068022_793_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3152, 1, 0, 3168, 1, 0, 3184, 1, 0, 3728, 1, 0, 3744, 1, 0, 3760, 1, 0, 4432, 4096, 0, 4448, 4096, 0, 4464, 4096, 0, 4992, 4369, 0, 4992, 4369, 0, 4992, 4369, 0, 4992, 4369, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 6464, 32768, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3152, 1, 0, 3168, 1, 0, 3184, 1, 0, 3728, 1, 0, 3744, 1, 0, 3760, 1, 0, 4432, 4096, 0, 4448, 4096, 0, 4464, 4096, 0, 4992, 4369, 0, 4992, 4369, 0, 4992, 4369, 0, 4992, 4369, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 5312, 30583, 0, 6464, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756580969303961873_794_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756580969303961873_794_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..871639be --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756580969303961873_794_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 9))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((187 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((204 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2004, 2048, 0, 2008, 2048, 0, 2012, 2048, 0, 2020, 2048, 0, 2024, 2048, 0, 2028, 2048, 0, 2576, 1, 0, 2592, 1, 0, 3968, 512, 0, 4608, 6, 0, 4608, 6, 0, 5248, 4, 0, 5824, 4, 0, 6720, 62464, 0, 6720, 62464, 0, 6720, 62464, 0, 6720, 62464, 0, 6720, 62464, 0, 7632, 57344, 0, 7632, 57344, 0, 7632, 57344, 0, 7648, 57344, 0, 7648, 57344, 0, 7648, 57344, 0, 9360, 2370, 0, 9360, 2370, 0, 9360, 2370, 0, 9360, 2370, 0, 9364, 2370, 0, 9364, 2370, 0, 9364, 2370, 0, 9364, 2370, 0, 9368, 2370, 0, 9368, 2370, 0, 9368, 2370, 0, 9368, 2370, 0, 9376, 2370, 0, 9376, 2370, 0, 9376, 2370, 0, 9376, 2370, 0, 9380, 2370, 0, 9380, 2370, 0, 9380, 2370, 0, 9380, 2370, 0, 9384, 2370, 0, 9384, 2370, 0, 9384, 2370, 0, 9384, 2370, 0, 13072, 9217, 0, 13072, 9217, 0, 13072, 9217, 0, 13076, 9217, 0, 13076, 9217, 0, 13076, 9217, 0, 13080, 9217, 0, 13080, 9217, 0, 13080, 9217, 0, 13088, 9217, 0, 13088, 9217, 0, 13088, 9217, 0, 13092, 9217, 0, 13092, 9217, 0, 13092, 9217, 0, 13096, 9217, 0, 13096, 9217, 0, 13096, 9217, 0, 2004, 2048, 0, 2008, 2048, 0, 2012, 2048, 0, 2020, 2048, 0, 2024, 2048, 0, 2028, 2048, 0, 2576, 1, 0, 2592, 1, 0, 3968, 512, 0, 4608, 6, 0, 4608, 6, 0, 5248, 4, 0, 5824, 4, 0, 6720, 62464, 0, 6720, 62464, 0, 6720, 62464, 0, 6720, 62464, 0, 6720, 62464, 0, 7632, 57344, 0, 7632, 57344, 0, 7632, 57344, 0, 7648, 57344, 0, 7648, 57344, 0, 7648, 57344, 0, 9360, 2370, 0, 9360, 2370, 0, 9360, 2370, 0, 9360, 2370, 0, 9364, 2370, 0, 9364, 2370, 0, 9364, 2370, 0, 9364, 2370, 0, 9368, 2370, 0, 9368, 2370, 0, 9368, 2370, 0, 9368, 2370, 0, 9376, 2370, 0, 9376, 2370, 0, 9376, 2370, 0, 9376, 2370, 0, 9380, 2370, 0, 9380, 2370, 0, 9380, 2370, 0, 9380, 2370, 0, 9384, 2370, 0, 9384, 2370, 0, 9384, 2370, 0, 9384, 2370, 0, 13072, 9217, 0, 13072, 9217, 0, 13072, 9217, 0, 13076, 9217, 0, 13076, 9217, 0, 13076, 9217, 0, 13080, 9217, 0, 13080, 9217, 0, 13080, 9217, 0, 13088, 9217, 0, 13088, 9217, 0, 13088, 9217, 0, 13092, 9217, 0, 13092, 9217, 0, 13092, 9217, 0, 13096, 9217, 0, 13096, 9217, 0, 13096, 9217, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581000021185245_795_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581000021185245_795_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5ace6502 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581000021185245_795_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5056, 65, 0, 5056, 65, 0, 6144, 8, 0, 7488, 32768, 0, 8064, 1040, 0, 8064, 1040, 0, 8384, 18724, 0, 8384, 18724, 0, 8384, 18724, 0, 8384, 18724, 0, 8384, 18724, 0, 5056, 65, 0, 5056, 65, 0, 6144, 8, 0, 7488, 32768, 0, 8064, 1040, 0, 8064, 1040, 0, 8384, 18724, 0, 8384, 18724, 0, 8384, 18724, 0, 8384, 18724, 0, 8384, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581000202146783_796_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581000202146783_796_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2f835220 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581000202146783_796_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,298 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((27 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((254 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((273 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((284 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i8 == 1)) { + continue; + } + if ((i8 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 744 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 4097, 0, 2880, 4097, 0, 2884, 4097, 0, 2884, 4097, 0, 2896, 4097, 0, 2896, 4097, 0, 2900, 4097, 0, 2900, 4097, 0, 3584, 4097, 0, 3584, 4097, 0, 3588, 4097, 0, 3588, 4097, 0, 3600, 4097, 0, 3600, 4097, 0, 3604, 4097, 0, 3604, 4097, 0, 4736, 1, 0, 4752, 1, 0, 7104, 8738, 0, 7104, 8738, 0, 7104, 8738, 0, 7104, 8738, 0, 13328, 68, 0, 13328, 68, 0, 13344, 68, 0, 13344, 68, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 19024, 16384, 0, 19040, 16384, 0, 2880, 4097, 0, 2880, 4097, 0, 2884, 4097, 0, 2884, 4097, 0, 2896, 4097, 0, 2896, 4097, 0, 2900, 4097, 0, 2900, 4097, 0, 3584, 4097, 0, 3584, 4097, 0, 3588, 4097, 0, 3588, 4097, 0, 3600, 4097, 0, 3600, 4097, 0, 3604, 4097, 0, 3604, 4097, 0, 4736, 1, 0, 4752, 1, 0, 7104, 8738, 0, 7104, 8738, 0, 7104, 8738, 0, 7104, 8738, 0, 13328, 68, 0, 13328, 68, 0, 13344, 68, 0, 13344, 68, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16272, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16276, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16280, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16288, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16292, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 16296, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18192, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18196, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18200, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18208, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18212, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 18216, 43690, 0, 19024, 16384, 0, 19040, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581104256517942_797_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581104256517942_797_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..910a9b6c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581104256517942_797_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + if ((i3 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2064, 32768, 0, 2080, 32768, 0, 2096, 32768, 0, 5264, 4, 0, 5280, 4, 0, 5968, 16388, 0, 5968, 16388, 0, 5984, 16388, 0, 5984, 16388, 0, 6592, 73, 0, 6592, 73, 0, 6592, 73, 0, 9408, 1040, 0, 9408, 1040, 0, 9412, 1040, 0, 9412, 1040, 0, 9424, 1040, 0, 9424, 1040, 0, 9428, 1040, 0, 9428, 1040, 0, 9984, 1040, 0, 9984, 1040, 0, 9988, 1040, 0, 9988, 1040, 0, 10000, 1040, 0, 10000, 1040, 0, 10004, 1040, 0, 10004, 1040, 0, 12352, 18432, 0, 12352, 18432, 0, 12368, 18432, 0, 12368, 18432, 0, 12384, 18432, 0, 12384, 18432, 0, 13380, 256, 0, 13384, 256, 0, 13388, 256, 0, 13396, 256, 0, 13400, 256, 0, 13404, 256, 0, 13412, 256, 0, 13416, 256, 0, 13420, 256, 0, 2064, 32768, 0, 2080, 32768, 0, 2096, 32768, 0, 5264, 4, 0, 5280, 4, 0, 5968, 16388, 0, 5968, 16388, 0, 5984, 16388, 0, 5984, 16388, 0, 6592, 73, 0, 6592, 73, 0, 6592, 73, 0, 9408, 1040, 0, 9408, 1040, 0, 9412, 1040, 0, 9412, 1040, 0, 9424, 1040, 0, 9424, 1040, 0, 9428, 1040, 0, 9428, 1040, 0, 9984, 1040, 0, 9984, 1040, 0, 9988, 1040, 0, 9988, 1040, 0, 10000, 1040, 0, 10000, 1040, 0, 10004, 1040, 0, 10004, 1040, 0, 12352, 18432, 0, 12352, 18432, 0, 12368, 18432, 0, 12368, 18432, 0, 12384, 18432, 0, 12384, 18432, 0, 13380, 256, 0, 13384, 256, 0, 13388, 256, 0, 13396, 256, 0, 13400, 256, 0, 13404, 256, 0, 13412, 256, 0, 13416, 256, 0, 13420, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581403773791759_799_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581403773791759_799_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d05a2027 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581403773791759_799_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,226 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((178 << 6) | (i2 << 4)) | (i3 << 2)) | counter4); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((189 << 6) | (i2 << 4)) | (i3 << 2)) | counter4); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 600 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2688, 4640, 0, 2688, 4640, 0, 2688, 4640, 0, 2704, 4640, 0, 2704, 4640, 0, 2704, 4640, 0, 4356, 272, 0, 4356, 272, 0, 4360, 272, 0, 4360, 272, 0, 4364, 272, 0, 4364, 272, 0, 4372, 272, 0, 4372, 272, 0, 4376, 272, 0, 4376, 272, 0, 4380, 272, 0, 4380, 272, 0, 5060, 16, 0, 5064, 16, 0, 5068, 16, 0, 5076, 16, 0, 5080, 16, 0, 5084, 16, 0, 6016, 512, 0, 6032, 512, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6784, 34952, 0, 6784, 34952, 0, 6784, 34952, 0, 6784, 34952, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 9728, 512, 0, 9732, 512, 0, 9736, 512, 0, 9744, 512, 0, 9748, 512, 0, 9752, 512, 0, 13120, 32771, 0, 13120, 32771, 0, 13120, 32771, 0, 13136, 32771, 0, 13136, 32771, 0, 13136, 32771, 0, 14272, 49167, 0, 14272, 49167, 0, 14272, 49167, 0, 14272, 49167, 0, 14272, 49167, 0, 14272, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 576, 17, 0, 576, 17, 0, 2688, 4640, 0, 2688, 4640, 0, 2688, 4640, 0, 2704, 4640, 0, 2704, 4640, 0, 2704, 4640, 0, 4356, 272, 0, 4356, 272, 0, 4360, 272, 0, 4360, 272, 0, 4364, 272, 0, 4364, 272, 0, 4372, 272, 0, 4372, 272, 0, 4376, 272, 0, 4376, 272, 0, 4380, 272, 0, 4380, 272, 0, 5060, 16, 0, 5064, 16, 0, 5068, 16, 0, 5076, 16, 0, 5080, 16, 0, 5084, 16, 0, 6016, 512, 0, 6032, 512, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6336, 30583, 0, 6784, 34952, 0, 6784, 34952, 0, 6784, 34952, 0, 6784, 34952, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7872, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 7888, 21845, 0, 9728, 512, 0, 9732, 512, 0, 9736, 512, 0, 9744, 512, 0, 9748, 512, 0, 9752, 512, 0, 13120, 32771, 0, 13120, 32771, 0, 13120, 32771, 0, 13136, 32771, 0, 13136, 32771, 0, 13136, 32771, 0, 14272, 49167, 0, 14272, 49167, 0, 14272, 49167, 0, 14272, 49167, 0, 14272, 49167, 0, 14272, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14288, 49167, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14848, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0, 14864, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581408613366883_800_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581408613366883_800_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a6514813 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581408613366883_800_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,412 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((336 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((343 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((383 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((413 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((424 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (433 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (443 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((461 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((475 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((482 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter7 == 1)) { + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (492 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 4113, 0, 3392, 4113, 0, 3392, 4113, 0, 13440, 17408, 0, 13440, 17408, 0, 16704, 4, 0, 16720, 4, 0, 17280, 4, 0, 17296, 4, 0, 18752, 4, 0, 19200, 16384, 0, 20368, 34816, 0, 20368, 34816, 0, 20384, 34816, 0, 20384, 34816, 0, 21968, 8, 0, 21972, 8, 0, 21984, 8, 0, 21988, 8, 0, 22672, 34816, 0, 22672, 34816, 0, 22688, 34816, 0, 22688, 34816, 0, 30420, 4, 0, 30424, 4, 0, 31488, 34952, 0, 31488, 34952, 0, 31488, 34952, 0, 31488, 34952, 0, 3392, 4113, 0, 3392, 4113, 0, 3392, 4113, 0, 13440, 17408, 0, 13440, 17408, 0, 16704, 4, 0, 16720, 4, 0, 17280, 4, 0, 17296, 4, 0, 18752, 4, 0, 19200, 16384, 0, 20368, 34816, 0, 20368, 34816, 0, 20384, 34816, 0, 20384, 34816, 0, 21968, 8, 0, 21972, 8, 0, 21984, 8, 0, 21988, 8, 0, 22672, 34816, 0, 22672, 34816, 0, 22688, 34816, 0, 22688, 34816, 0, 30420, 4, 0, 30424, 4, 0, 31488, 34952, 0, 31488, 34952, 0, 31488, 34952, 0, 31488, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581412551180726_801_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581412551180726_801_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..decf5c67 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581412551180726_801_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,216 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 32768, 0, 1808, 32768, 0, 1824, 32768, 0, 6208, 32768, 0, 7680, 18498, 0, 7680, 18498, 0, 7680, 18498, 0, 7680, 18498, 0, 9024, 5, 0, 9024, 5, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 12160, 18561, 0, 12160, 18561, 0, 12160, 18561, 0, 12160, 18561, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14720, 2720, 0, 14720, 2720, 0, 14720, 2720, 0, 14720, 2720, 0, 14464, 1024, 0, 1792, 32768, 0, 1808, 32768, 0, 1824, 32768, 0, 6208, 32768, 0, 7680, 18498, 0, 7680, 18498, 0, 7680, 18498, 0, 7680, 18498, 0, 9024, 5, 0, 9024, 5, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10688, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10704, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 10720, 10922, 0, 12160, 18561, 0, 12160, 18561, 0, 12160, 18561, 0, 12160, 18561, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14976, 61471, 0, 14720, 2720, 0, 14720, 2720, 0, 14720, 2720, 0, 14720, 2720, 0, 14464, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581445878213670_803_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581445878213670_803_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48d36a64 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581445878213670_803_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,275 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2944, 256, 0, 3984, 16, 0, 4000, 16, 0, 10112, 32, 0, 14592, 34952, 0, 14592, 34952, 0, 14592, 34952, 0, 14592, 34952, 0, 18688, 17473, 0, 18688, 17473, 0, 18688, 17473, 0, 18688, 17473, 0, 18304, 16, 0, 17920, 8456, 0, 17920, 8456, 0, 17920, 8456, 0, 17664, 4096, 0, 2944, 256, 0, 3984, 16, 0, 4000, 16, 0, 10112, 32, 0, 14592, 34952, 0, 14592, 34952, 0, 14592, 34952, 0, 14592, 34952, 0, 18688, 17473, 0, 18688, 17473, 0, 18688, 17473, 0, 18688, 17473, 0, 18304, 16, 0, 17920, 8456, 0, 17920, 8456, 0, 17920, 8456, 0, 17664, 4096, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581446861121583_804_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581446861121583_804_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b2a58b21 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581446861121583_804_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 21504, 0, 1088, 21504, 0, 1088, 21504, 0, 1104, 21504, 0, 1104, 21504, 0, 1104, 21504, 0, 1120, 21504, 0, 1120, 21504, 0, 1120, 21504, 0, 2240, 20481, 0, 2240, 20481, 0, 2240, 20481, 0, 2256, 20481, 0, 2256, 20481, 0, 2256, 20481, 0, 2272, 20481, 0, 2272, 20481, 0, 2272, 20481, 0, 2880, 1, 0, 2896, 1, 0, 2912, 1, 0, 3776, 16388, 0, 3776, 16388, 0, 3792, 16388, 0, 3792, 16388, 0, 3808, 16388, 0, 3808, 16388, 0, 1088, 21504, 0, 1088, 21504, 0, 1088, 21504, 0, 1104, 21504, 0, 1104, 21504, 0, 1104, 21504, 0, 1120, 21504, 0, 1120, 21504, 0, 1120, 21504, 0, 2240, 20481, 0, 2240, 20481, 0, 2240, 20481, 0, 2256, 20481, 0, 2256, 20481, 0, 2256, 20481, 0, 2272, 20481, 0, 2272, 20481, 0, 2272, 20481, 0, 2880, 1, 0, 2896, 1, 0, 2912, 1, 0, 3776, 16388, 0, 3776, 16388, 0, 3792, 16388, 0, 3792, 16388, 0, 3808, 16388, 0, 3808, 16388, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581448296322615_805_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581448296322615_805_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2838deae --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581448296322615_805_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 11)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3712, 1024, 0, 3728, 1024, 0, 3744, 1024, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 5184, 85, 0, 5184, 85, 0, 5184, 85, 0, 5184, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3712, 1024, 0, 3728, 1024, 0, 3744, 1024, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 4544, 18724, 0, 5184, 85, 0, 5184, 85, 0, 5184, 85, 0, 5184, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581448708957473_806_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581448708957473_806_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f26e4cbb --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581448708957473_806_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,126 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 846 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 4, 0, 912, 4, 0, 928, 4, 0, 2308, 34848, 0, 2308, 34848, 0, 2308, 34848, 0, 2312, 34848, 0, 2312, 34848, 0, 2312, 34848, 0, 2316, 34848, 0, 2316, 34848, 0, 2316, 34848, 0, 2324, 34848, 0, 2324, 34848, 0, 2324, 34848, 0, 2328, 34848, 0, 2328, 34848, 0, 2328, 34848, 0, 2332, 34848, 0, 2332, 34848, 0, 2332, 34848, 0, 2340, 34848, 0, 2340, 34848, 0, 2340, 34848, 0, 2344, 34848, 0, 2344, 34848, 0, 2344, 34848, 0, 2348, 34848, 0, 2348, 34848, 0, 2348, 34848, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 5380, 518, 0, 5380, 518, 0, 5380, 518, 0, 5384, 518, 0, 5384, 518, 0, 5384, 518, 0, 5388, 518, 0, 5388, 518, 0, 5388, 518, 0, 5396, 518, 0, 5396, 518, 0, 5396, 518, 0, 5400, 518, 0, 5400, 518, 0, 5400, 518, 0, 5404, 518, 0, 5404, 518, 0, 5404, 518, 0, 5412, 518, 0, 5412, 518, 0, 5412, 518, 0, 5416, 518, 0, 5416, 518, 0, 5416, 518, 0, 5420, 518, 0, 5420, 518, 0, 5420, 518, 0, 5824, 128, 0, 5840, 128, 0, 5856, 128, 0, 896, 4, 0, 912, 4, 0, 928, 4, 0, 2308, 34848, 0, 2308, 34848, 0, 2308, 34848, 0, 2312, 34848, 0, 2312, 34848, 0, 2312, 34848, 0, 2316, 34848, 0, 2316, 34848, 0, 2316, 34848, 0, 2324, 34848, 0, 2324, 34848, 0, 2324, 34848, 0, 2328, 34848, 0, 2328, 34848, 0, 2328, 34848, 0, 2332, 34848, 0, 2332, 34848, 0, 2332, 34848, 0, 2340, 34848, 0, 2340, 34848, 0, 2340, 34848, 0, 2344, 34848, 0, 2344, 34848, 0, 2344, 34848, 0, 2348, 34848, 0, 2348, 34848, 0, 2348, 34848, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3460, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3464, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3468, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3476, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3480, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3484, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3492, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3496, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 3500, 63503, 0, 5380, 518, 0, 5380, 518, 0, 5380, 518, 0, 5384, 518, 0, 5384, 518, 0, 5384, 518, 0, 5388, 518, 0, 5388, 518, 0, 5388, 518, 0, 5396, 518, 0, 5396, 518, 0, 5396, 518, 0, 5400, 518, 0, 5400, 518, 0, 5400, 518, 0, 5404, 518, 0, 5404, 518, 0, 5404, 518, 0, 5412, 518, 0, 5412, 518, 0, 5412, 518, 0, 5416, 518, 0, 5416, 518, 0, 5416, 518, 0, 5420, 518, 0, 5420, 518, 0, 5420, 518, 0, 5824, 128, 0, 5840, 128, 0, 5856, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581494323355636_808_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581494323355636_808_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0d40d425 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581494323355636_808_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,415 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1728, 2, 0, 4288, 8194, 0, 4288, 8194, 0, 6016, 64, 0, 6032, 64, 0, 10880, 17476, 0, 10880, 17476, 0, 10880, 17476, 0, 10880, 17476, 0, 10896, 17476, 0, 10896, 17476, 0, 10896, 17476, 0, 10896, 17476, 0, 11328, 17476, 0, 11328, 17476, 0, 11328, 17476, 0, 11328, 17476, 0, 11344, 17476, 0, 11344, 17476, 0, 11344, 17476, 0, 11344, 17476, 0, 13760, 4, 0, 13776, 4, 0, 14208, 34952, 0, 14208, 34952, 0, 14208, 34952, 0, 14208, 34952, 0, 15104, 17, 0, 15104, 17, 0, 16400, 8192, 0, 16416, 8192, 0, 16432, 8192, 0, 17488, 2, 0, 17504, 2, 0, 17520, 2, 0, 18704, 512, 0, 18720, 512, 0, 18736, 512, 0, 19968, 17476, 0, 19968, 17476, 0, 19968, 17476, 0, 19968, 17476, 0, 25664, 2048, 0, 576, 17, 0, 576, 17, 0, 1728, 2, 0, 4288, 8194, 0, 4288, 8194, 0, 6016, 64, 0, 6032, 64, 0, 10880, 17476, 0, 10880, 17476, 0, 10880, 17476, 0, 10880, 17476, 0, 10896, 17476, 0, 10896, 17476, 0, 10896, 17476, 0, 10896, 17476, 0, 11328, 17476, 0, 11328, 17476, 0, 11328, 17476, 0, 11328, 17476, 0, 11344, 17476, 0, 11344, 17476, 0, 11344, 17476, 0, 11344, 17476, 0, 13760, 4, 0, 13776, 4, 0, 14208, 34952, 0, 14208, 34952, 0, 14208, 34952, 0, 14208, 34952, 0, 15104, 17, 0, 15104, 17, 0, 16400, 8192, 0, 16416, 8192, 0, 16432, 8192, 0, 17488, 2, 0, 17504, 2, 0, 17520, 2, 0, 18704, 512, 0, 18720, 512, 0, 18736, 512, 0, 19968, 17476, 0, 19968, 17476, 0, 19968, 17476, 0, 19968, 17476, 0, 25664, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581502504778272_809_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581502504778272_809_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..35c7d862 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581502504778272_809_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 18456, 0, 4096, 18456, 0, 4096, 18456, 0, 4096, 18456, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3072, 32, 0, 2816, 192, 0, 2816, 192, 0, 5264, 32769, 0, 5264, 32769, 0, 5280, 32769, 0, 5280, 32769, 0, 6672, 1024, 0, 6688, 1024, 0, 9044, 1024, 0, 9048, 1024, 0, 9060, 1024, 0, 9064, 1024, 0, 9876, 1028, 0, 9876, 1028, 0, 9880, 1028, 0, 9880, 1028, 0, 9892, 1028, 0, 9892, 1028, 0, 9896, 1028, 0, 9896, 1028, 0, 4096, 18456, 0, 4096, 18456, 0, 4096, 18456, 0, 4096, 18456, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3456, 46087, 0, 3072, 32, 0, 2816, 192, 0, 2816, 192, 0, 5264, 32769, 0, 5264, 32769, 0, 5280, 32769, 0, 5280, 32769, 0, 6672, 1024, 0, 6688, 1024, 0, 9044, 1024, 0, 9048, 1024, 0, 9060, 1024, 0, 9064, 1024, 0, 9876, 1028, 0, 9876, 1028, 0, 9880, 1028, 0, 9880, 1028, 0, 9892, 1028, 0, 9892, 1028, 0, 9896, 1028, 0, 9896, 1028, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581505576461847_811_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581505576461847_811_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1011fd84 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581505576461847_811_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((206 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8016, 1, 0, 8032, 1, 0, 8848, 1, 0, 8864, 1, 0, 9552, 12291, 0, 9552, 12291, 0, 9552, 12291, 0, 9552, 12291, 0, 9568, 12291, 0, 9568, 12291, 0, 9568, 12291, 0, 9568, 12291, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 10304, 34952, 0, 10304, 34952, 0, 10304, 34952, 0, 10304, 34952, 0, 11984, 40968, 0, 11984, 40968, 0, 11984, 40968, 0, 12000, 40968, 0, 12000, 40968, 0, 12000, 40968, 0, 14160, 4360, 0, 14160, 4360, 0, 14160, 4360, 0, 14176, 4360, 0, 14176, 4360, 0, 14176, 4360, 0, 8016, 1, 0, 8032, 1, 0, 8848, 1, 0, 8864, 1, 0, 9552, 12291, 0, 9552, 12291, 0, 9552, 12291, 0, 9552, 12291, 0, 9568, 12291, 0, 9568, 12291, 0, 9568, 12291, 0, 9568, 12291, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 9856, 30583, 0, 10304, 34952, 0, 10304, 34952, 0, 10304, 34952, 0, 10304, 34952, 0, 11984, 40968, 0, 11984, 40968, 0, 11984, 40968, 0, 12000, 40968, 0, 12000, 40968, 0, 12000, 40968, 0, 14160, 4360, 0, 14160, 4360, 0, 14160, 4360, 0, 14176, 4360, 0, 14176, 4360, 0, 14176, 4360, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581506833394782_812_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581506833394782_812_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..57f1768a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581506833394782_812_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 16386, 0, 2560, 16386, 0, 2176, 128, 0, 1920, 9281, 0, 1920, 9281, 0, 1920, 9281, 0, 1920, 9281, 0, 3200, 85, 0, 3200, 85, 0, 3200, 85, 0, 3200, 85, 0, 4672, 170, 0, 4672, 170, 0, 4672, 170, 0, 4672, 170, 0, 4676, 170, 0, 4676, 170, 0, 4676, 170, 0, 4676, 170, 0, 4688, 170, 0, 4688, 170, 0, 4688, 170, 0, 4688, 170, 0, 4692, 170, 0, 4692, 170, 0, 4692, 170, 0, 4692, 170, 0, 2560, 16386, 0, 2560, 16386, 0, 2176, 128, 0, 1920, 9281, 0, 1920, 9281, 0, 1920, 9281, 0, 1920, 9281, 0, 3200, 85, 0, 3200, 85, 0, 3200, 85, 0, 3200, 85, 0, 4672, 170, 0, 4672, 170, 0, 4672, 170, 0, 4672, 170, 0, 4676, 170, 0, 4676, 170, 0, 4676, 170, 0, 4676, 170, 0, 4688, 170, 0, 4688, 170, 0, 4688, 170, 0, 4688, 170, 0, 4692, 170, 0, 4692, 170, 0, 4692, 170, 0, 4692, 170, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581626109754009_816_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581626109754009_816_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2bd606ee --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581626109754009_816_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,220 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 1, 0, 2896, 1, 0, 2912, 1, 0, 3712, 1, 0, 3728, 1, 0, 3744, 1, 0, 5696, 4369, 0, 5696, 4369, 0, 5696, 4369, 0, 5696, 4369, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6784, 32768, 0, 9344, 32768, 0, 9360, 32768, 0, 9376, 32768, 0, 10496, 2184, 0, 10496, 2184, 0, 10496, 2184, 0, 2880, 1, 0, 2896, 1, 0, 2912, 1, 0, 3712, 1, 0, 3728, 1, 0, 3744, 1, 0, 5696, 4369, 0, 5696, 4369, 0, 5696, 4369, 0, 5696, 4369, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6016, 30583, 0, 6784, 32768, 0, 9344, 32768, 0, 9360, 32768, 0, 9376, 32768, 0, 10496, 2184, 0, 10496, 2184, 0, 10496, 2184, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581626867803680_817_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581626867803680_817_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09361667 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581626867803680_817_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((216 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1984, 1024, 0, 7232, 2, 0, 9360, 16, 0, 9364, 16, 0, 9368, 16, 0, 9376, 16, 0, 9380, 16, 0, 9384, 16, 0, 9808, 2, 0, 9824, 2, 0, 10240, 8192, 0, 10560, 18724, 0, 10560, 18724, 0, 10560, 18724, 0, 10560, 18724, 0, 10560, 18724, 0, 11920, 32768, 0, 11936, 32768, 0, 13844, 34824, 0, 13844, 34824, 0, 13844, 34824, 0, 13848, 34824, 0, 13848, 34824, 0, 13848, 34824, 0, 13852, 34824, 0, 13852, 34824, 0, 13852, 34824, 0, 13860, 34824, 0, 13860, 34824, 0, 13860, 34824, 0, 13864, 34824, 0, 13864, 34824, 0, 13864, 34824, 0, 13868, 34824, 0, 13868, 34824, 0, 13868, 34824, 0, 14288, 16, 0, 14304, 16, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1984, 1024, 0, 7232, 2, 0, 9360, 16, 0, 9364, 16, 0, 9368, 16, 0, 9376, 16, 0, 9380, 16, 0, 9384, 16, 0, 9808, 2, 0, 9824, 2, 0, 10240, 8192, 0, 10560, 18724, 0, 10560, 18724, 0, 10560, 18724, 0, 10560, 18724, 0, 10560, 18724, 0, 11920, 32768, 0, 11936, 32768, 0, 13844, 34824, 0, 13844, 34824, 0, 13844, 34824, 0, 13848, 34824, 0, 13848, 34824, 0, 13848, 34824, 0, 13852, 34824, 0, 13852, 34824, 0, 13852, 34824, 0, 13860, 34824, 0, 13860, 34824, 0, 13860, 34824, 0, 13864, 34824, 0, 13864, 34824, 0, 13864, 34824, 0, 13868, 34824, 0, 13868, 34824, 0, 13868, 34824, 0, 14288, 16, 0, 14304, 16, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581663851749850_821_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581663851749850_821_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..50e8afd4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581663851749850_821_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + if ((i0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 450 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 32769, 0, 1280, 32769, 0, 1296, 32769, 0, 1296, 32769, 0, 1312, 32769, 0, 1312, 32769, 0, 2560, 49155, 0, 2560, 49155, 0, 2560, 49155, 0, 2560, 49155, 0, 2576, 49155, 0, 2576, 49155, 0, 2576, 49155, 0, 2576, 49155, 0, 2592, 49155, 0, 2592, 49155, 0, 2592, 49155, 0, 2592, 49155, 0, 3904, 36865, 0, 3904, 36865, 0, 3904, 36865, 0, 3920, 36865, 0, 3920, 36865, 0, 3920, 36865, 0, 3936, 36865, 0, 3936, 36865, 0, 3936, 36865, 0, 7808, 32771, 0, 7808, 32771, 0, 7808, 32771, 0, 7824, 32771, 0, 7824, 32771, 0, 7824, 32771, 0, 7840, 32771, 0, 7840, 32771, 0, 7840, 32771, 0, 8512, 32771, 0, 8512, 32771, 0, 8512, 32771, 0, 8528, 32771, 0, 8528, 32771, 0, 8528, 32771, 0, 8544, 32771, 0, 8544, 32771, 0, 8544, 32771, 0, 8960, 31744, 0, 8960, 31744, 0, 8960, 31744, 0, 8960, 31744, 0, 8960, 31744, 0, 8976, 31744, 0, 8976, 31744, 0, 8976, 31744, 0, 8976, 31744, 0, 8976, 31744, 0, 8992, 31744, 0, 8992, 31744, 0, 8992, 31744, 0, 8992, 31744, 0, 8992, 31744, 0, 9408, 31744, 0, 9408, 31744, 0, 9408, 31744, 0, 9408, 31744, 0, 9408, 31744, 0, 9424, 31744, 0, 9424, 31744, 0, 9424, 31744, 0, 9424, 31744, 0, 9424, 31744, 0, 9440, 31744, 0, 9440, 31744, 0, 9440, 31744, 0, 9440, 31744, 0, 9440, 31744, 0, 1280, 32769, 0, 1280, 32769, 0, 1296, 32769, 0, 1296, 32769, 0, 1312, 32769, 0, 1312, 32769, 0, 2560, 49155, 0, 2560, 49155, 0, 2560, 49155, 0, 2560, 49155, 0, 2576, 49155, 0, 2576, 49155, 0, 2576, 49155, 0, 2576, 49155, 0, 2592, 49155, 0, 2592, 49155, 0, 2592, 49155, 0, 2592, 49155, 0, 3904, 36865, 0, 3904, 36865, 0, 3904, 36865, 0, 3920, 36865, 0, 3920, 36865, 0, 3920, 36865, 0, 3936, 36865, 0, 3936, 36865, 0, 3936, 36865, 0, 7808, 32771, 0, 7808, 32771, 0, 7808, 32771, 0, 7824, 32771, 0, 7824, 32771, 0, 7824, 32771, 0, 7840, 32771, 0, 7840, 32771, 0, 7840, 32771, 0, 8512, 32771, 0, 8512, 32771, 0, 8512, 32771, 0, 8528, 32771, 0, 8528, 32771, 0, 8528, 32771, 0, 8544, 32771, 0, 8544, 32771, 0, 8544, 32771, 0, 8960, 31744, 0, 8960, 31744, 0, 8960, 31744, 0, 8960, 31744, 0, 8960, 31744, 0, 8976, 31744, 0, 8976, 31744, 0, 8976, 31744, 0, 8976, 31744, 0, 8976, 31744, 0, 8992, 31744, 0, 8992, 31744, 0, 8992, 31744, 0, 8992, 31744, 0, 8992, 31744, 0, 9408, 31744, 0, 9408, 31744, 0, 9408, 31744, 0, 9408, 31744, 0, 9408, 31744, 0, 9424, 31744, 0, 9424, 31744, 0, 9424, 31744, 0, 9424, 31744, 0, 9424, 31744, 0, 9440, 31744, 0, 9440, 31744, 0, 9440, 31744, 0, 9440, 31744, 0, 9440, 31744, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581666786167332_822_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581666786167332_822_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ae787158 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581666786167332_822_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,316 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((228 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1792, 36, 0, 1792, 36, 0, 5376, 18432, 0, 5376, 18432, 0, 7872, 16384, 0, 8320, 2048, 0, 10576, 49153, 0, 10576, 49153, 0, 10576, 49153, 0, 13264, 2048, 0, 14164, 28, 0, 14164, 28, 0, 14164, 28, 0, 14168, 28, 0, 14168, 28, 0, 14168, 28, 0, 14612, 4, 0, 14616, 4, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1792, 36, 0, 1792, 36, 0, 5376, 18432, 0, 5376, 18432, 0, 7872, 16384, 0, 8320, 2048, 0, 10576, 49153, 0, 10576, 49153, 0, 10576, 49153, 0, 13264, 2048, 0, 14164, 28, 0, 14164, 28, 0, 14164, 28, 0, 14168, 28, 0, 14168, 28, 0, 14168, 28, 0, 14612, 4, 0, 14616, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581668995080673_823_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581668995080673_823_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..731d4bda --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581668995080673_823_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 4480, 32768, 0, 4496, 32768, 0, 4512, 32768, 0, 6656, 1, 0, 11712, 1040, 0, 11712, 1040, 0, 12032, 18724, 0, 12032, 18724, 0, 12032, 18724, 0, 12032, 18724, 0, 12032, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 1472, 28086, 0, 4480, 32768, 0, 4496, 32768, 0, 4512, 32768, 0, 6656, 1, 0, 11712, 1040, 0, 11712, 1040, 0, 12032, 18724, 0, 12032, 18724, 0, 12032, 18724, 0, 12032, 18724, 0, 12032, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581669667367185_824_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581669667367185_824_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa8d3e22 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581669667367185_824_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,133 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 17, 0, 1792, 17, 0, 2688, 17476, 0, 2688, 17476, 0, 2688, 17476, 0, 2688, 17476, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 17, 0, 1792, 17, 0, 2688, 17476, 0, 2688, 17476, 0, 2688, 17476, 0, 2688, 17476, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0, 3136, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581676616666398_827_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581676616666398_827_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c680a7f2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581676616666398_827_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 1040, 0, 2560, 1040, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2560, 1040, 0, 2560, 1040, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0, 2880, 28086, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581676749291828_828_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581676749291828_828_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1e336439 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581676749291828_828_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,450 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 7)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((324 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((347 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((371 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((389 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((400 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((407 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (424 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8000, 1024, 0, 8640, 17, 0, 8640, 17, 0, 11008, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 13248, 4352, 0, 13248, 4352, 0, 14528, 1, 0, 16704, 17, 0, 16704, 17, 0, 17600, 17476, 0, 17600, 17476, 0, 17600, 17476, 0, 17600, 17476, 0, 18768, 32768, 0, 18784, 32768, 0, 18800, 32768, 0, 22224, 32776, 0, 22224, 32776, 0, 22240, 32776, 0, 22240, 32776, 0, 22256, 32776, 0, 22256, 32776, 0, 27136, 17476, 0, 27136, 17476, 0, 27136, 17476, 0, 27136, 17476, 0, 27584, 34952, 0, 27584, 34952, 0, 27584, 34952, 0, 27584, 34952, 0, 8000, 1024, 0, 8640, 17, 0, 8640, 17, 0, 11008, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 13248, 4352, 0, 13248, 4352, 0, 14528, 1, 0, 16704, 17, 0, 16704, 17, 0, 17600, 17476, 0, 17600, 17476, 0, 17600, 17476, 0, 17600, 17476, 0, 18768, 32768, 0, 18784, 32768, 0, 18800, 32768, 0, 22224, 32776, 0, 22224, 32776, 0, 22240, 32776, 0, 22240, 32776, 0, 22256, 32776, 0, 22256, 32776, 0, 27136, 17476, 0, 27136, 17476, 0, 27136, 17476, 0, 27136, 17476, 0, 27584, 34952, 0, 27584, 34952, 0, 27584, 34952, 0, 27584, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581684425144230_831_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581684425144230_831_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c9faa1e1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581684425144230_831_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,226 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2176, 2, 0, 2192, 2, 0, 2208, 2, 0, 3844, 130, 0, 3844, 130, 0, 3848, 130, 0, 3848, 130, 0, 3860, 130, 0, 3860, 130, 0, 3864, 130, 0, 3864, 130, 0, 3876, 130, 0, 3876, 130, 0, 3880, 130, 0, 3880, 130, 0, 5888, 8192, 0, 5904, 8192, 0, 5920, 8192, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2176, 2, 0, 2192, 2, 0, 2208, 2, 0, 3844, 130, 0, 3844, 130, 0, 3848, 130, 0, 3848, 130, 0, 3860, 130, 0, 3860, 130, 0, 3864, 130, 0, 3864, 130, 0, 3876, 130, 0, 3876, 130, 0, 3880, 130, 0, 3880, 130, 0, 5888, 8192, 0, 5904, 8192, 0, 5920, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581685881493395_832_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581685881493395_832_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..03f9e220 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581685881493395_832_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,323 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((218 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((245 << 6) | (counter3 << 4)) | (i4 << 2)) | i5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((292 << 6) | (counter3 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((307 << 6) | (counter3 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + break; + } + } + break; + } + case 2: { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((328 << 6) | (counter3 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((349 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4416, 1024, 0, 7872, 16, 0, 9792, 18724, 0, 9792, 18724, 0, 9792, 18724, 0, 9792, 18724, 0, 9792, 18724, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 12496, 16, 0, 13968, 16, 0, 13972, 16, 0, 15696, 2064, 0, 15696, 2064, 0, 15697, 2064, 0, 15697, 2064, 0, 15700, 2064, 0, 15700, 2064, 0, 15701, 2064, 0, 15701, 2064, 0, 17168, 4129, 0, 17168, 4129, 0, 17168, 4129, 0, 18704, 32769, 0, 18704, 32769, 0, 18708, 32769, 0, 18708, 32769, 0, 21012, 16388, 0, 21012, 16388, 0, 21016, 16388, 0, 21016, 16388, 0, 22352, 4133, 0, 22352, 4133, 0, 22352, 4133, 0, 22352, 4133, 0, 4416, 1024, 0, 7872, 16, 0, 9792, 18724, 0, 9792, 18724, 0, 9792, 18724, 0, 9792, 18724, 0, 9792, 18724, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 11344, 61447, 0, 12496, 16, 0, 13968, 16, 0, 13972, 16, 0, 15696, 2064, 0, 15696, 2064, 0, 15697, 2064, 0, 15697, 2064, 0, 15700, 2064, 0, 15700, 2064, 0, 15701, 2064, 0, 15701, 2064, 0, 17168, 4129, 0, 17168, 4129, 0, 17168, 4129, 0, 18704, 32769, 0, 18704, 32769, 0, 18708, 32769, 0, 18708, 32769, 0, 21012, 16388, 0, 21012, 16388, 0, 21016, 16388, 0, 21016, 16388, 0, 22352, 4133, 0, 22352, 4133, 0, 22352, 4133, 0, 22352, 4133, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581691098449073_833_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581691098449073_833_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..566fb6bd --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581691098449073_833_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6016, 1040, 0, 6016, 1040, 0, 7616, 16384, 0, 8320, 16388, 0, 8320, 16388, 0, 9856, 2336, 0, 9856, 2336, 0, 9856, 2336, 0, 6016, 1040, 0, 6016, 1040, 0, 7616, 16384, 0, 8320, 16388, 0, 8320, 16388, 0, 9856, 2336, 0, 9856, 2336, 0, 9856, 2336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581699172811692_835_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581699172811692_835_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fbb003e6 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581699172811692_835_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,336 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3200, 34816, 0, 3200, 34816, 0, 3904, 32768, 0, 6208, 17, 0, 6208, 17, 0, 7104, 17476, 0, 7104, 17476, 0, 7104, 17476, 0, 7104, 17476, 0, 7552, 34952, 0, 7552, 34952, 0, 7552, 34952, 0, 7552, 34952, 0, 10240, 640, 0, 10240, 640, 0, 11328, 129, 0, 11328, 129, 0, 11344, 129, 0, 11344, 129, 0, 11360, 129, 0, 11360, 129, 0, 13696, 4096, 0, 14784, 32, 0, 14800, 32, 0, 14816, 32, 0, 15872, 16384, 0, 15888, 16384, 0, 15904, 16384, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3200, 34816, 0, 3200, 34816, 0, 3904, 32768, 0, 6208, 17, 0, 6208, 17, 0, 7104, 17476, 0, 7104, 17476, 0, 7104, 17476, 0, 7104, 17476, 0, 7552, 34952, 0, 7552, 34952, 0, 7552, 34952, 0, 7552, 34952, 0, 10240, 640, 0, 10240, 640, 0, 11328, 129, 0, 11328, 129, 0, 11344, 129, 0, 11344, 129, 0, 11360, 129, 0, 11360, 129, 0, 13696, 4096, 0, 14784, 32, 0, 14800, 32, 0, 14816, 32, 0, 15872, 16384, 0, 15888, 16384, 0, 15904, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581705863799681_837_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581705863799681_837_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..72d1e2e2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581705863799681_837_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,425 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 9)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((403 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((421 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((432 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 1312, 1, 0, 2704, 1, 0, 2708, 1, 0, 2720, 1, 0, 2724, 1, 0, 3584, 64, 0, 8064, 33288, 0, 8064, 33288, 0, 8064, 33288, 0, 9024, 1, 0, 9040, 1, 0, 10176, 1, 0, 10192, 1, 0, 10880, 1, 0, 10896, 1, 0, 11968, 4096, 0, 11984, 4096, 0, 25808, 256, 0, 25824, 256, 0, 25840, 256, 0, 26960, 16388, 0, 26960, 16388, 0, 26976, 16388, 0, 26976, 16388, 0, 26992, 16388, 0, 26992, 16388, 0, 27664, 16388, 0, 27664, 16388, 0, 27680, 16388, 0, 27680, 16388, 0, 27696, 16388, 0, 27696, 16388, 0, 1296, 1, 0, 1312, 1, 0, 2704, 1, 0, 2708, 1, 0, 2720, 1, 0, 2724, 1, 0, 3584, 64, 0, 8064, 33288, 0, 8064, 33288, 0, 8064, 33288, 0, 9024, 1, 0, 9040, 1, 0, 10176, 1, 0, 10192, 1, 0, 10880, 1, 0, 10896, 1, 0, 11968, 4096, 0, 11984, 4096, 0, 25808, 256, 0, 25824, 256, 0, 25840, 256, 0, 26960, 16388, 0, 26960, 16388, 0, 26976, 16388, 0, 26976, 16388, 0, 26992, 16388, 0, 26992, 16388, 0, 27664, 16388, 0, 27664, 16388, 0, 27680, 16388, 0, 27680, 16388, 0, 27696, 16388, 0, 27696, 16388, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581715120967161_838_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581715120967161_838_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a95449aa --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581715120967161_838_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581749243392247_841_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581749243392247_841_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d181c99c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581749243392247_841_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,495 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((323 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((334 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((348 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (353 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((393 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((411 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter8 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((425 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (430 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (434 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1536, 34, 0, 1536, 34, 0, 1552, 34, 0, 1552, 34, 0, 1568, 34, 0, 1568, 34, 0, 3524, 32, 0, 3528, 32, 0, 3540, 32, 0, 3544, 32, 0, 3556, 32, 0, 3560, 32, 0, 4740, 2, 0, 4744, 2, 0, 4756, 2, 0, 4760, 2, 0, 4772, 2, 0, 4776, 2, 0, 5184, 2, 0, 5200, 2, 0, 5216, 2, 0, 6016, 64, 0, 6592, 1024, 0, 6912, 16388, 0, 6912, 16388, 0, 9940, 8, 0, 9944, 8, 0, 9948, 8, 0, 9956, 8, 0, 9960, 8, 0, 9964, 8, 0, 10900, 8, 0, 10904, 8, 0, 10908, 8, 0, 10916, 8, 0, 10920, 8, 0, 10924, 8, 0, 14464, 2048, 0, 17040, 33288, 0, 17040, 33288, 0, 17040, 33288, 0, 18368, 33288, 0, 18368, 33288, 0, 18368, 33288, 0, 23168, 1040, 0, 23168, 1040, 0, 25152, 2, 0, 25168, 2, 0, 25184, 2, 0, 26308, 8194, 0, 26308, 8194, 0, 26312, 8194, 0, 26312, 8194, 0, 26324, 8194, 0, 26324, 8194, 0, 26328, 8194, 0, 26328, 8194, 0, 26340, 8194, 0, 26340, 8194, 0, 26344, 8194, 0, 26344, 8194, 0, 27200, 2, 0, 27216, 2, 0, 27232, 2, 0, 27520, 18724, 0, 27520, 18724, 0, 27520, 18724, 0, 27520, 18724, 0, 27520, 18724, 0, 576, 17, 0, 576, 17, 0, 1536, 34, 0, 1536, 34, 0, 1552, 34, 0, 1552, 34, 0, 1568, 34, 0, 1568, 34, 0, 3524, 32, 0, 3528, 32, 0, 3540, 32, 0, 3544, 32, 0, 3556, 32, 0, 3560, 32, 0, 4740, 2, 0, 4744, 2, 0, 4756, 2, 0, 4760, 2, 0, 4772, 2, 0, 4776, 2, 0, 5184, 2, 0, 5200, 2, 0, 5216, 2, 0, 6016, 64, 0, 6592, 1024, 0, 6912, 16388, 0, 6912, 16388, 0, 9940, 8, 0, 9944, 8, 0, 9948, 8, 0, 9956, 8, 0, 9960, 8, 0, 9964, 8, 0, 10900, 8, 0, 10904, 8, 0, 10908, 8, 0, 10916, 8, 0, 10920, 8, 0, 10924, 8, 0, 14464, 2048, 0, 17040, 33288, 0, 17040, 33288, 0, 17040, 33288, 0, 18368, 33288, 0, 18368, 33288, 0, 18368, 33288, 0, 23168, 1040, 0, 23168, 1040, 0, 25152, 2, 0, 25168, 2, 0, 25184, 2, 0, 26308, 8194, 0, 26308, 8194, 0, 26312, 8194, 0, 26312, 8194, 0, 26324, 8194, 0, 26324, 8194, 0, 26328, 8194, 0, 26328, 8194, 0, 26340, 8194, 0, 26340, 8194, 0, 26344, 8194, 0, 26344, 8194, 0, 27200, 2, 0, 27216, 2, 0, 27232, 2, 0, 27520, 18724, 0, 27520, 18724, 0, 27520, 18724, 0, 27520, 18724, 0, 27520, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581836317734027_844_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581836317734027_844_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db40d012 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581836317734027_844_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,127 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 4356, 0, 2176, 4356, 0, 2176, 4356, 0, 2192, 4356, 0, 2192, 4356, 0, 2192, 4356, 0, 5824, 64, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0, 2176, 4356, 0, 2176, 4356, 0, 2176, 4356, 0, 2192, 4356, 0, 2192, 4356, 0, 2192, 4356, 0, 5824, 64, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0, 5440, 21781, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581836550199531_845_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581836550199531_845_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..97302027 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581836550199531_845_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,130 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2432, 32769, 0, 2432, 32769, 0, 2448, 32769, 0, 2448, 32769, 0, 2464, 32769, 0, 2464, 32769, 0, 2880, 64, 0, 2896, 64, 0, 2912, 64, 0, 3456, 1040, 0, 3456, 1040, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2432, 32769, 0, 2432, 32769, 0, 2448, 32769, 0, 2448, 32769, 0, 2464, 32769, 0, 2464, 32769, 0, 2880, 64, 0, 2896, 64, 0, 2912, 64, 0, 3456, 1040, 0, 3456, 1040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581920820369259_847_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581920820369259_847_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..961bce0b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581920820369259_847_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 32769, 0, 1280, 32769, 0, 1920, 1, 0, 3264, 32776, 0, 3264, 32776, 0, 4224, 32769, 0, 4224, 32769, 0, 4672, 64, 0, 6080, 4608, 0, 6080, 4608, 0, 6656, 1040, 0, 6656, 1040, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 1280, 32769, 0, 1280, 32769, 0, 1920, 1, 0, 3264, 32776, 0, 3264, 32776, 0, 4224, 32769, 0, 4224, 32769, 0, 4672, 64, 0, 6080, 4608, 0, 6080, 4608, 0, 6656, 1040, 0, 6656, 1040, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0, 6976, 28086, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581925931408845_849_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581925931408845_849_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..781bd68a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581925931408845_849_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,417 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((322 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((400 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (418 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (427 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (440 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (463 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1984, 1, 0, 2560, 16, 0, 2880, 256, 0, 14528, 16388, 0, 14528, 16388, 0, 15680, 32776, 0, 15680, 32776, 0, 22720, 32768, 0, 27328, 2176, 0, 27328, 2176, 0, 27904, 2176, 0, 27904, 2176, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1984, 1, 0, 2560, 16, 0, 2880, 256, 0, 14528, 16388, 0, 14528, 16388, 0, 15680, 32776, 0, 15680, 32776, 0, 22720, 32768, 0, 27328, 2176, 0, 27328, 2176, 0, 27904, 2176, 0, 27904, 2176, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581946225988446_851_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581946225988446_851_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dcd1ed3b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581946225988446_851_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 32, 0, 2064, 32, 0, 2080, 32, 0, 3264, 520, 0, 3264, 520, 0, 3268, 520, 0, 3268, 520, 0, 3280, 520, 0, 3280, 520, 0, 3284, 520, 0, 3284, 520, 0, 3296, 520, 0, 3296, 520, 0, 3300, 520, 0, 3300, 520, 0, 4352, 2, 0, 4356, 2, 0, 4368, 2, 0, 4372, 2, 0, 4384, 2, 0, 4388, 2, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 32, 0, 2064, 32, 0, 2080, 32, 0, 3264, 520, 0, 3264, 520, 0, 3268, 520, 0, 3268, 520, 0, 3280, 520, 0, 3280, 520, 0, 3284, 520, 0, 3284, 520, 0, 3296, 520, 0, 3296, 520, 0, 3300, 520, 0, 3300, 520, 0, 4352, 2, 0, 4356, 2, 0, 4368, 2, 0, 4372, 2, 0, 4384, 2, 0, 4388, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581953969286023_853_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581953969286023_853_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6a7603d1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581953969286023_853_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4736, 32778, 0, 4736, 32778, 0, 4736, 32778, 0, 5376, 8, 0, 6272, 2080, 0, 6272, 2080, 0, 6976, 32778, 0, 6976, 32778, 0, 6976, 32778, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4736, 32778, 0, 4736, 32778, 0, 4736, 32778, 0, 5376, 8, 0, 6272, 2080, 0, 6272, 2080, 0, 6976, 32778, 0, 6976, 32778, 0, 6976, 32778, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581954536123066_855_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581954536123066_855_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..468eec34 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581954536123066_855_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3200, 18724, 0, 3200, 18724, 0, 3200, 18724, 0, 3200, 18724, 0, 3200, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3200, 18724, 0, 3200, 18724, 0, 3200, 18724, 0, 3200, 18724, 0, 3200, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756581985184654489_857_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756581985184654489_857_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09e76232 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756581985184654489_857_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,287 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 510 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3524, 1024, 0, 3528, 1024, 0, 3540, 1024, 0, 3544, 1024, 0, 3556, 1024, 0, 3560, 1024, 0, 4100, 8192, 0, 4104, 8192, 0, 4116, 8192, 0, 4120, 8192, 0, 4132, 8192, 0, 4136, 8192, 0, 4544, 128, 0, 4560, 128, 0, 4576, 128, 0, 7680, 8192, 0, 7696, 8192, 0, 7712, 8192, 0, 8592, 16384, 0, 8608, 16384, 0, 8624, 16384, 0, 9808, 18436, 0, 9808, 18436, 0, 9808, 18436, 0, 9812, 18436, 0, 9812, 18436, 0, 9812, 18436, 0, 9824, 18436, 0, 9824, 18436, 0, 9824, 18436, 0, 9828, 18436, 0, 9828, 18436, 0, 9828, 18436, 0, 9840, 18436, 0, 9840, 18436, 0, 9840, 18436, 0, 9844, 18436, 0, 9844, 18436, 0, 9844, 18436, 0, 11344, 16388, 0, 11344, 16388, 0, 11348, 16388, 0, 11348, 16388, 0, 11360, 16388, 0, 11360, 16388, 0, 11364, 16388, 0, 11364, 16388, 0, 11376, 16388, 0, 11376, 16388, 0, 11380, 16388, 0, 11380, 16388, 0, 11792, 2048, 0, 11796, 2048, 0, 11808, 2048, 0, 11812, 2048, 0, 11824, 2048, 0, 11828, 2048, 0, 12624, 18436, 0, 12624, 18436, 0, 12624, 18436, 0, 12628, 18436, 0, 12628, 18436, 0, 12628, 18436, 0, 12640, 18436, 0, 12640, 18436, 0, 12640, 18436, 0, 12644, 18436, 0, 12644, 18436, 0, 12644, 18436, 0, 12656, 18436, 0, 12656, 18436, 0, 12656, 18436, 0, 12660, 18436, 0, 12660, 18436, 0, 12660, 18436, 0, 15504, 8192, 0, 15520, 8192, 0, 15536, 8192, 0, 16208, 8192, 0, 16224, 8192, 0, 16240, 8192, 0, 17408, 8192, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3524, 1024, 0, 3528, 1024, 0, 3540, 1024, 0, 3544, 1024, 0, 3556, 1024, 0, 3560, 1024, 0, 4100, 8192, 0, 4104, 8192, 0, 4116, 8192, 0, 4120, 8192, 0, 4132, 8192, 0, 4136, 8192, 0, 4544, 128, 0, 4560, 128, 0, 4576, 128, 0, 7680, 8192, 0, 7696, 8192, 0, 7712, 8192, 0, 8592, 16384, 0, 8608, 16384, 0, 8624, 16384, 0, 9808, 18436, 0, 9808, 18436, 0, 9808, 18436, 0, 9812, 18436, 0, 9812, 18436, 0, 9812, 18436, 0, 9824, 18436, 0, 9824, 18436, 0, 9824, 18436, 0, 9828, 18436, 0, 9828, 18436, 0, 9828, 18436, 0, 9840, 18436, 0, 9840, 18436, 0, 9840, 18436, 0, 9844, 18436, 0, 9844, 18436, 0, 9844, 18436, 0, 11344, 16388, 0, 11344, 16388, 0, 11348, 16388, 0, 11348, 16388, 0, 11360, 16388, 0, 11360, 16388, 0, 11364, 16388, 0, 11364, 16388, 0, 11376, 16388, 0, 11376, 16388, 0, 11380, 16388, 0, 11380, 16388, 0, 11792, 2048, 0, 11796, 2048, 0, 11808, 2048, 0, 11812, 2048, 0, 11824, 2048, 0, 11828, 2048, 0, 12624, 18436, 0, 12624, 18436, 0, 12624, 18436, 0, 12628, 18436, 0, 12628, 18436, 0, 12628, 18436, 0, 12640, 18436, 0, 12640, 18436, 0, 12640, 18436, 0, 12644, 18436, 0, 12644, 18436, 0, 12644, 18436, 0, 12656, 18436, 0, 12656, 18436, 0, 12656, 18436, 0, 12660, 18436, 0, 12660, 18436, 0, 12660, 18436, 0, 15504, 8192, 0, 15520, 8192, 0, 15536, 8192, 0, 16208, 8192, 0, 16224, 8192, 0, 16240, 8192, 0, 17408, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582031336477994_858_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582031336477994_858_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..73894c62 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582031336477994_858_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,212 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 342 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1744, 1, 0, 1760, 1, 0, 1776, 1, 0, 5008, 1, 0, 5024, 1, 0, 5040, 1, 0, 8848, 1, 0, 8864, 1, 0, 8880, 1, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 1744, 1, 0, 1760, 1, 0, 1776, 1, 0, 5008, 1, 0, 5024, 1, 0, 5040, 1, 0, 8848, 1, 0, 8864, 1, 0, 8880, 1, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10368, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10384, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 10400, 21845, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12864, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12880, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0, 12896, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582036237797045_859_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582036237797045_859_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..08d98280 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582036237797045_859_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,155 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 63, 0, 576, 63, 0, 576, 63, 0, 576, 63, 0, 576, 63, 0, 576, 63, 0, 1744, 1, 0, 1760, 1, 0, 2384, 1, 0, 2400, 1, 0, 4736, 8584, 0, 4736, 8584, 0, 4736, 8584, 0, 4736, 8584, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 5696, 512, 0, 5712, 512, 0, 7172, 1088, 0, 7172, 1088, 0, 7176, 1088, 0, 7176, 1088, 0, 7180, 1088, 0, 7180, 1088, 0, 7188, 1088, 0, 7188, 1088, 0, 7192, 1088, 0, 7192, 1088, 0, 7196, 1088, 0, 7196, 1088, 0, 576, 63, 0, 576, 63, 0, 576, 63, 0, 576, 63, 0, 576, 63, 0, 576, 63, 0, 1744, 1, 0, 1760, 1, 0, 2384, 1, 0, 2400, 1, 0, 4736, 8584, 0, 4736, 8584, 0, 4736, 8584, 0, 4736, 8584, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 4480, 21589, 0, 5696, 512, 0, 5712, 512, 0, 7172, 1088, 0, 7172, 1088, 0, 7176, 1088, 0, 7176, 1088, 0, 7180, 1088, 0, 7180, 1088, 0, 7188, 1088, 0, 7188, 1088, 0, 7192, 1088, 0, 7192, 1088, 0, 7196, 1088, 0, 7196, 1088, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582037688564760_860_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582037688564760_860_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9eaf3367 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582037688564760_860_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 32775, 0, 1216, 32775, 0, 1216, 32775, 0, 1216, 32775, 0, 2560, 32775, 0, 2560, 32775, 0, 2560, 32775, 0, 2560, 32775, 0, 1216, 32775, 0, 1216, 32775, 0, 1216, 32775, 0, 1216, 32775, 0, 2560, 32775, 0, 2560, 32775, 0, 2560, 32775, 0, 2560, 32775, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582037836539629_861_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582037836539629_861_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..547808be --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582037836539629_861_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 2512, 10240, 0, 2512, 10240, 0, 2528, 10240, 0, 2528, 10240, 0, 4672, 16, 0, 4688, 16, 0, 4704, 16, 0, 4992, 16644, 0, 4992, 16644, 0, 4992, 16644, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 2512, 10240, 0, 2512, 10240, 0, 2528, 10240, 0, 2528, 10240, 0, 4672, 16, 0, 4688, 16, 0, 4704, 16, 0, 4992, 16644, 0, 4992, 16644, 0, 4992, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582038441799871_862_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582038441799871_862_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ba6151d4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582038441799871_862_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 63489, 0, 1216, 63489, 0, 1216, 63489, 0, 1216, 63489, 0, 1216, 63489, 0, 1216, 63489, 0, 1856, 1, 0, 3024, 45057, 0, 3024, 45057, 0, 3024, 45057, 0, 3024, 45057, 0, 3040, 45057, 0, 3040, 45057, 0, 3040, 45057, 0, 3040, 45057, 0, 3520, 18432, 0, 3520, 18432, 0, 1216, 63489, 0, 1216, 63489, 0, 1216, 63489, 0, 1216, 63489, 0, 1216, 63489, 0, 1216, 63489, 0, 1856, 1, 0, 3024, 45057, 0, 3024, 45057, 0, 3024, 45057, 0, 3024, 45057, 0, 3040, 45057, 0, 3040, 45057, 0, 3040, 45057, 0, 3040, 45057, 0, 3520, 18432, 0, 3520, 18432, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582039183842659_864_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582039183842659_864_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f12009a7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582039183842659_864_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 15)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582039283299570_865_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582039283299570_865_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f024af15 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582039283299570_865_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,456 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 8)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((236 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 9)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((332 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 2)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (381 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 17, 0, 1088, 17, 0, 1104, 17, 0, 1104, 17, 0, 9808, 2, 0, 9824, 2, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 13904, 4, 0, 15120, 16385, 0, 15120, 16385, 0, 15124, 16385, 0, 15124, 16385, 0, 15568, 1024, 0, 1088, 17, 0, 1088, 17, 0, 1104, 17, 0, 1104, 17, 0, 9808, 2, 0, 9824, 2, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 10304, 26214, 0, 13904, 4, 0, 15120, 16385, 0, 15120, 16385, 0, 15124, 16385, 0, 15124, 16385, 0, 15568, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582046982993849_867_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582046982993849_867_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ad1333b8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582046982993849_867_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,389 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 15)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((236 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((328 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (390 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 10369, 0, 1984, 10369, 0, 1984, 10369, 0, 1984, 10369, 0, 1600, 64, 0, 4160, 8192, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 9344, 64, 0, 14144, 2, 0, 14160, 2, 0, 14176, 2, 0, 15808, 18692, 0, 15808, 18692, 0, 15808, 18692, 0, 15808, 18692, 0, 16704, 17, 0, 16704, 17, 0, 17600, 17476, 0, 17600, 17476, 0, 17600, 17476, 0, 17600, 17476, 0, 18880, 32768, 0, 20096, 32768, 0, 20112, 32768, 0, 20128, 32768, 0, 22080, 32768, 0, 1984, 10369, 0, 1984, 10369, 0, 1984, 10369, 0, 1984, 10369, 0, 1600, 64, 0, 4160, 8192, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 8512, 55318, 0, 9344, 64, 0, 14144, 2, 0, 14160, 2, 0, 14176, 2, 0, 15808, 18692, 0, 15808, 18692, 0, 15808, 18692, 0, 15808, 18692, 0, 16704, 17, 0, 16704, 17, 0, 17600, 17476, 0, 17600, 17476, 0, 17600, 17476, 0, 17600, 17476, 0, 18880, 32768, 0, 20096, 32768, 0, 20112, 32768, 0, 20128, 32768, 0, 22080, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582051346859543_868_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582051346859543_868_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dce49d09 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582051346859543_868_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,130 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 43010, 0, 1728, 43010, 0, 1728, 43010, 0, 1728, 43010, 0, 2432, 40962, 0, 2432, 40962, 0, 2432, 40962, 0, 3072, 85, 0, 3072, 85, 0, 3072, 85, 0, 3072, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 43010, 0, 1728, 43010, 0, 1728, 43010, 0, 1728, 43010, 0, 2432, 40962, 0, 2432, 40962, 0, 2432, 40962, 0, 3072, 85, 0, 3072, 85, 0, 3072, 85, 0, 3072, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582082171900589_870_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582082171900589_870_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9a5cd773 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582082171900589_870_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 4))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 3: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((203 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((213 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((222 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 570 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 4352, 0, 1040, 4352, 0, 1680, 1, 0, 2256, 4368, 0, 2256, 4368, 0, 2256, 4368, 0, 2704, 17, 0, 2704, 17, 0, 5952, 4, 0, 6400, 24576, 0, 6400, 24576, 0, 12032, 10312, 0, 12032, 10312, 0, 12032, 10312, 0, 12032, 10312, 0, 12048, 10312, 0, 12048, 10312, 0, 12048, 10312, 0, 12048, 10312, 0, 12064, 10312, 0, 12064, 10312, 0, 12064, 10312, 0, 12064, 10312, 0, 12992, 64, 0, 12996, 64, 0, 13008, 64, 0, 13012, 64, 0, 13024, 64, 0, 13028, 64, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14784, 17476, 0, 14784, 17476, 0, 14784, 17476, 0, 14784, 17476, 0, 14788, 17476, 0, 14788, 17476, 0, 14788, 17476, 0, 14788, 17476, 0, 14800, 17476, 0, 14800, 17476, 0, 14800, 17476, 0, 14800, 17476, 0, 14804, 17476, 0, 14804, 17476, 0, 14804, 17476, 0, 14804, 17476, 0, 14816, 17476, 0, 14816, 17476, 0, 14816, 17476, 0, 14816, 17476, 0, 14820, 17476, 0, 14820, 17476, 0, 14820, 17476, 0, 14820, 17476, 0, 1040, 4352, 0, 1040, 4352, 0, 1680, 1, 0, 2256, 4368, 0, 2256, 4368, 0, 2256, 4368, 0, 2704, 17, 0, 2704, 17, 0, 5952, 4, 0, 6400, 24576, 0, 6400, 24576, 0, 12032, 10312, 0, 12032, 10312, 0, 12032, 10312, 0, 12032, 10312, 0, 12048, 10312, 0, 12048, 10312, 0, 12048, 10312, 0, 12048, 10312, 0, 12064, 10312, 0, 12064, 10312, 0, 12064, 10312, 0, 12064, 10312, 0, 12992, 64, 0, 12996, 64, 0, 13008, 64, 0, 13012, 64, 0, 13024, 64, 0, 13028, 64, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14208, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14212, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14224, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14228, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14240, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14244, 43658, 0, 14784, 17476, 0, 14784, 17476, 0, 14784, 17476, 0, 14784, 17476, 0, 14788, 17476, 0, 14788, 17476, 0, 14788, 17476, 0, 14788, 17476, 0, 14800, 17476, 0, 14800, 17476, 0, 14800, 17476, 0, 14800, 17476, 0, 14804, 17476, 0, 14804, 17476, 0, 14804, 17476, 0, 14804, 17476, 0, 14816, 17476, 0, 14816, 17476, 0, 14816, 17476, 0, 14816, 17476, 0, 14820, 17476, 0, 14820, 17476, 0, 14820, 17476, 0, 14820, 17476, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582102571332425_873_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582102571332425_873_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..56f3e731 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582102571332425_873_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 1664, 16644, 0, 1664, 16644, 0, 1664, 16644, 0, 768, 65, 0, 768, 65, 0, 1344, 1040, 0, 1344, 1040, 0, 1664, 16644, 0, 1664, 16644, 0, 1664, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582103055267455_875_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582103055267455_875_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a2be4275 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582103055267455_875_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,320 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 11))) { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((288 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((338 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((355 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 588 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 32769, 0, 1296, 32769, 0, 1312, 32769, 0, 1312, 32769, 0, 6352, 8, 0, 6368, 8, 0, 6912, 1040, 0, 6912, 1040, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 11136, 1040, 0, 11136, 1040, 0, 12288, 16384, 0, 13200, 4, 0, 13216, 4, 0, 13232, 4, 0, 15808, 1024, 0, 17024, 1, 0, 17040, 1, 0, 18436, 4096, 0, 18440, 4096, 0, 18444, 4096, 0, 18452, 4096, 0, 18456, 4096, 0, 18460, 4096, 0, 20096, 1, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 1296, 32769, 0, 1296, 32769, 0, 1312, 32769, 0, 1312, 32769, 0, 6352, 8, 0, 6368, 8, 0, 6912, 1040, 0, 6912, 1040, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 7232, 28086, 0, 11136, 1040, 0, 11136, 1040, 0, 12288, 16384, 0, 13200, 4, 0, 13216, 4, 0, 13232, 4, 0, 15808, 1024, 0, 17024, 1, 0, 17040, 1, 0, 18436, 4096, 0, 18440, 4096, 0, 18444, 4096, 0, 18452, 4096, 0, 18456, 4096, 0, 18460, 4096, 0, 20096, 1, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21632, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 21648, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22720, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22724, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22728, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22736, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22740, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0, 22744, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582107759586236_876_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582107759586236_876_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b2bf391 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582107759586236_876_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,253 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 4608, 32, 0, 4624, 32, 0, 5824, 18436, 0, 5824, 18436, 0, 5824, 18436, 0, 5828, 18436, 0, 5828, 18436, 0, 5828, 18436, 0, 5840, 18436, 0, 5840, 18436, 0, 5840, 18436, 0, 5844, 18436, 0, 5844, 18436, 0, 5844, 18436, 0, 6528, 18432, 0, 6528, 18432, 0, 6532, 18432, 0, 6532, 18432, 0, 6544, 18432, 0, 6544, 18432, 0, 6548, 18432, 0, 6548, 18432, 0, 7424, 16384, 0, 7440, 16384, 0, 9728, 1, 0, 10432, 1, 0, 11008, 5201, 0, 11008, 5201, 0, 11008, 5201, 0, 11008, 5201, 0, 11008, 5201, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 12224, 85, 0, 12224, 85, 0, 12224, 85, 0, 12224, 85, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 4608, 32, 0, 4624, 32, 0, 5824, 18436, 0, 5824, 18436, 0, 5824, 18436, 0, 5828, 18436, 0, 5828, 18436, 0, 5828, 18436, 0, 5840, 18436, 0, 5840, 18436, 0, 5840, 18436, 0, 5844, 18436, 0, 5844, 18436, 0, 5844, 18436, 0, 6528, 18432, 0, 6528, 18432, 0, 6532, 18432, 0, 6532, 18432, 0, 6544, 18432, 0, 6544, 18432, 0, 6548, 18432, 0, 6548, 18432, 0, 7424, 16384, 0, 7440, 16384, 0, 9728, 1, 0, 10432, 1, 0, 11008, 5201, 0, 11008, 5201, 0, 11008, 5201, 0, 11008, 5201, 0, 11008, 5201, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 11328, 65535, 0, 12224, 85, 0, 12224, 85, 0, 12224, 85, 0, 12224, 85, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0, 12800, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582131706407934_878_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582131706407934_878_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8eea5fa1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582131706407934_878_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4352, 16384, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 4352, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582131839548545_879_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582131839548545_879_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..15d9f9f8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582131839548545_879_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5136, 32800, 0, 5136, 32800, 0, 5152, 32800, 0, 5152, 32800, 0, 5168, 32800, 0, 5168, 32800, 0, 6544, 2, 0, 6560, 2, 0, 6576, 2, 0, 7504, 516, 0, 7504, 516, 0, 7520, 516, 0, 7520, 516, 0, 7536, 516, 0, 7536, 516, 0, 8464, 132, 0, 8464, 132, 0, 8480, 132, 0, 8480, 132, 0, 8496, 132, 0, 8496, 132, 0, 9936, 18498, 0, 9936, 18498, 0, 9936, 18498, 0, 9936, 18498, 0, 9952, 18498, 0, 9952, 18498, 0, 9952, 18498, 0, 9952, 18498, 0, 9968, 18498, 0, 9968, 18498, 0, 9968, 18498, 0, 9968, 18498, 0, 5136, 32800, 0, 5136, 32800, 0, 5152, 32800, 0, 5152, 32800, 0, 5168, 32800, 0, 5168, 32800, 0, 6544, 2, 0, 6560, 2, 0, 6576, 2, 0, 7504, 516, 0, 7504, 516, 0, 7520, 516, 0, 7520, 516, 0, 7536, 516, 0, 7536, 516, 0, 8464, 132, 0, 8464, 132, 0, 8480, 132, 0, 8480, 132, 0, 8496, 132, 0, 8496, 132, 0, 9936, 18498, 0, 9936, 18498, 0, 9936, 18498, 0, 9936, 18498, 0, 9952, 18498, 0, 9952, 18498, 0, 9952, 18498, 0, 9952, 18498, 0, 9968, 18498, 0, 9968, 18498, 0, 9968, 18498, 0, 9968, 18498, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582133152183434_880_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582133152183434_880_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5b2054e0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582133152183434_880_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,247 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 11)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 15)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((193 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((216 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((239 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((258 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 32768, 0, 3072, 8192, 0, 5248, 8192, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 10688, 32768, 0, 10704, 32768, 0, 13844, 33560, 0, 13844, 33560, 0, 13844, 33560, 0, 13844, 33560, 0, 13844, 33560, 0, 13848, 33560, 0, 13848, 33560, 0, 13848, 33560, 0, 13848, 33560, 0, 13848, 33560, 0, 13860, 33560, 0, 13860, 33560, 0, 13860, 33560, 0, 13860, 33560, 0, 13860, 33560, 0, 13864, 33560, 0, 13864, 33560, 0, 13864, 33560, 0, 13864, 33560, 0, 13864, 33560, 0, 15316, 321, 0, 15316, 321, 0, 15316, 321, 0, 15320, 321, 0, 15320, 321, 0, 15320, 321, 0, 15332, 321, 0, 15332, 321, 0, 15332, 321, 0, 15336, 321, 0, 15336, 321, 0, 15336, 321, 0, 16532, 18465, 0, 16532, 18465, 0, 16532, 18465, 0, 16532, 18465, 0, 16536, 18465, 0, 16536, 18465, 0, 16536, 18465, 0, 16536, 18465, 0, 16548, 18465, 0, 16548, 18465, 0, 16548, 18465, 0, 16548, 18465, 0, 16552, 18465, 0, 16552, 18465, 0, 16552, 18465, 0, 16552, 18465, 0, 1920, 32768, 0, 3072, 8192, 0, 5248, 8192, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 5568, 18724, 0, 10688, 32768, 0, 10704, 32768, 0, 13844, 33560, 0, 13844, 33560, 0, 13844, 33560, 0, 13844, 33560, 0, 13844, 33560, 0, 13848, 33560, 0, 13848, 33560, 0, 13848, 33560, 0, 13848, 33560, 0, 13848, 33560, 0, 13860, 33560, 0, 13860, 33560, 0, 13860, 33560, 0, 13860, 33560, 0, 13860, 33560, 0, 13864, 33560, 0, 13864, 33560, 0, 13864, 33560, 0, 13864, 33560, 0, 13864, 33560, 0, 15316, 321, 0, 15316, 321, 0, 15316, 321, 0, 15320, 321, 0, 15320, 321, 0, 15320, 321, 0, 15332, 321, 0, 15332, 321, 0, 15332, 321, 0, 15336, 321, 0, 15336, 321, 0, 15336, 321, 0, 16532, 18465, 0, 16532, 18465, 0, 16532, 18465, 0, 16532, 18465, 0, 16536, 18465, 0, 16536, 18465, 0, 16536, 18465, 0, 16536, 18465, 0, 16548, 18465, 0, 16548, 18465, 0, 16548, 18465, 0, 16548, 18465, 0, 16552, 18465, 0, 16552, 18465, 0, 16552, 18465, 0, 16552, 18465, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582151418513564_881_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582151418513564_881_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..70bdac19 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582151418513564_881_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,361 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1488, 18, 0, 1488, 18, 0, 4752, 16, 0, 5696, 18724, 0, 5696, 18724, 0, 5696, 18724, 0, 5696, 18724, 0, 5696, 18724, 0, 6336, 17, 0, 6336, 17, 0, 7376, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 13008, 34, 0, 13008, 34, 0, 13024, 34, 0, 13024, 34, 0, 14544, 32, 0, 14560, 32, 0, 16016, 32, 0, 16032, 32, 0, 20544, 34952, 0, 20544, 34952, 0, 20544, 34952, 0, 20544, 34952, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1488, 18, 0, 1488, 18, 0, 4752, 16, 0, 5696, 18724, 0, 5696, 18724, 0, 5696, 18724, 0, 5696, 18724, 0, 5696, 18724, 0, 6336, 17, 0, 6336, 17, 0, 7376, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7376, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 7392, 8738, 0, 13008, 34, 0, 13008, 34, 0, 13024, 34, 0, 13024, 34, 0, 14544, 32, 0, 14560, 32, 0, 16016, 32, 0, 16032, 32, 0, 20544, 34952, 0, 20544, 34952, 0, 20544, 34952, 0, 20544, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582152987798448_882_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582152987798448_882_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..06954f36 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582152987798448_882_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,176 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 9)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4544, 32768, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4800, 21845, 0, 4544, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582153125891491_883_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582153125891491_883_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..51cc8e38 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582153125891491_883_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1232, 49153, 0, 1232, 49153, 0, 1232, 49153, 0, 1248, 49153, 0, 1248, 49153, 0, 1248, 49153, 0, 1264, 49153, 0, 1264, 49153, 0, 1264, 49153, 0, 3280, 8, 0, 3296, 8, 0, 3312, 8, 0, 4240, 8, 0, 4244, 8, 0, 4256, 8, 0, 4260, 8, 0, 4272, 8, 0, 4276, 8, 0, 5136, 57351, 0, 5136, 57351, 0, 5136, 57351, 0, 5136, 57351, 0, 5136, 57351, 0, 5136, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5168, 57351, 0, 5168, 57351, 0, 5168, 57351, 0, 5168, 57351, 0, 5168, 57351, 0, 5168, 57351, 0, 1232, 49153, 0, 1232, 49153, 0, 1232, 49153, 0, 1248, 49153, 0, 1248, 49153, 0, 1248, 49153, 0, 1264, 49153, 0, 1264, 49153, 0, 1264, 49153, 0, 3280, 8, 0, 3296, 8, 0, 3312, 8, 0, 4240, 8, 0, 4244, 8, 0, 4256, 8, 0, 4260, 8, 0, 4272, 8, 0, 4276, 8, 0, 5136, 57351, 0, 5136, 57351, 0, 5136, 57351, 0, 5136, 57351, 0, 5136, 57351, 0, 5136, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5152, 57351, 0, 5168, 57351, 0, 5168, 57351, 0, 5168, 57351, 0, 5168, 57351, 0, 5168, 57351, 0, 5168, 57351, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582157298026459_885_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582157298026459_885_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6f466d01 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582157298026459_885_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,317 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((105 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((205 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 882 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2816, 34952, 0, 2816, 34952, 0, 2816, 34952, 0, 2816, 34952, 0, 5632, 21505, 0, 5632, 21505, 0, 5632, 21505, 0, 5632, 21505, 0, 5648, 21505, 0, 5648, 21505, 0, 5648, 21505, 0, 5648, 21505, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 7872, 20481, 0, 7872, 20481, 0, 7872, 20481, 0, 7876, 20481, 0, 7876, 20481, 0, 7876, 20481, 0, 7888, 20481, 0, 7888, 20481, 0, 7888, 20481, 0, 7892, 20481, 0, 7892, 20481, 0, 7892, 20481, 0, 8576, 20481, 0, 8576, 20481, 0, 8576, 20481, 0, 8580, 20481, 0, 8580, 20481, 0, 8580, 20481, 0, 8592, 20481, 0, 8592, 20481, 0, 8592, 20481, 0, 8596, 20481, 0, 8596, 20481, 0, 8596, 20481, 0, 9600, 4, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 12560, 17, 0, 12560, 17, 0, 12564, 17, 0, 12564, 17, 0, 12568, 17, 0, 12568, 17, 0, 12576, 17, 0, 12576, 17, 0, 12580, 17, 0, 12580, 17, 0, 12584, 17, 0, 12584, 17, 0, 12592, 17, 0, 12592, 17, 0, 12596, 17, 0, 12596, 17, 0, 12600, 17, 0, 12600, 17, 0, 13456, 17476, 0, 13456, 17476, 0, 13456, 17476, 0, 13456, 17476, 0, 13460, 17476, 0, 13460, 17476, 0, 13460, 17476, 0, 13460, 17476, 0, 13464, 17476, 0, 13464, 17476, 0, 13464, 17476, 0, 13464, 17476, 0, 13472, 17476, 0, 13472, 17476, 0, 13472, 17476, 0, 13472, 17476, 0, 13476, 17476, 0, 13476, 17476, 0, 13476, 17476, 0, 13476, 17476, 0, 13480, 17476, 0, 13480, 17476, 0, 13480, 17476, 0, 13480, 17476, 0, 13488, 17476, 0, 13488, 17476, 0, 13488, 17476, 0, 13488, 17476, 0, 13492, 17476, 0, 13492, 17476, 0, 13492, 17476, 0, 13492, 17476, 0, 13496, 17476, 0, 13496, 17476, 0, 13496, 17476, 0, 13496, 17476, 0, 2816, 34952, 0, 2816, 34952, 0, 2816, 34952, 0, 2816, 34952, 0, 5632, 21505, 0, 5632, 21505, 0, 5632, 21505, 0, 5632, 21505, 0, 5648, 21505, 0, 5648, 21505, 0, 5648, 21505, 0, 5648, 21505, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6724, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6736, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 6740, 21845, 0, 7872, 20481, 0, 7872, 20481, 0, 7872, 20481, 0, 7876, 20481, 0, 7876, 20481, 0, 7876, 20481, 0, 7888, 20481, 0, 7888, 20481, 0, 7888, 20481, 0, 7892, 20481, 0, 7892, 20481, 0, 7892, 20481, 0, 8576, 20481, 0, 8576, 20481, 0, 8576, 20481, 0, 8580, 20481, 0, 8580, 20481, 0, 8580, 20481, 0, 8592, 20481, 0, 8592, 20481, 0, 8592, 20481, 0, 8596, 20481, 0, 8596, 20481, 0, 8596, 20481, 0, 9600, 4, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10832, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10848, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 10864, 21845, 0, 12560, 17, 0, 12560, 17, 0, 12564, 17, 0, 12564, 17, 0, 12568, 17, 0, 12568, 17, 0, 12576, 17, 0, 12576, 17, 0, 12580, 17, 0, 12580, 17, 0, 12584, 17, 0, 12584, 17, 0, 12592, 17, 0, 12592, 17, 0, 12596, 17, 0, 12596, 17, 0, 12600, 17, 0, 12600, 17, 0, 13456, 17476, 0, 13456, 17476, 0, 13456, 17476, 0, 13456, 17476, 0, 13460, 17476, 0, 13460, 17476, 0, 13460, 17476, 0, 13460, 17476, 0, 13464, 17476, 0, 13464, 17476, 0, 13464, 17476, 0, 13464, 17476, 0, 13472, 17476, 0, 13472, 17476, 0, 13472, 17476, 0, 13472, 17476, 0, 13476, 17476, 0, 13476, 17476, 0, 13476, 17476, 0, 13476, 17476, 0, 13480, 17476, 0, 13480, 17476, 0, 13480, 17476, 0, 13480, 17476, 0, 13488, 17476, 0, 13488, 17476, 0, 13488, 17476, 0, 13488, 17476, 0, 13492, 17476, 0, 13492, 17476, 0, 13492, 17476, 0, 13492, 17476, 0, 13496, 17476, 0, 13496, 17476, 0, 13496, 17476, 0, 13496, 17476, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582170198544998_887_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582170198544998_887_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..76b58a61 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582170198544998_887_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0, 1152, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582170321943833_888_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582170321943833_888_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..61105e2c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582170321943833_888_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,69 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582206016398984_891_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582206016398984_891_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c7ff86a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582206016398984_891_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582217975725885_894_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582217975725885_894_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9df76dd0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582217975725885_894_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 4369, 0, 1168, 4369, 0, 1168, 4369, 0, 1168, 4369, 0, 1184, 4369, 0, 1184, 4369, 0, 1184, 4369, 0, 1184, 4369, 0, 1200, 4369, 0, 1200, 4369, 0, 1200, 4369, 0, 1200, 4369, 0, 2836, 4113, 0, 2836, 4113, 0, 2836, 4113, 0, 2840, 4113, 0, 2840, 4113, 0, 2840, 4113, 0, 2852, 4113, 0, 2852, 4113, 0, 2852, 4113, 0, 2856, 4113, 0, 2856, 4113, 0, 2856, 4113, 0, 2868, 4113, 0, 2868, 4113, 0, 2868, 4113, 0, 2872, 4113, 0, 2872, 4113, 0, 2872, 4113, 0, 8768, 17476, 0, 8768, 17476, 0, 8768, 17476, 0, 8768, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 11456, 34952, 0, 11456, 34952, 0, 11456, 34952, 0, 11456, 34952, 0, 13440, 64512, 0, 13440, 64512, 0, 13440, 64512, 0, 13440, 64512, 0, 13440, 64512, 0, 13440, 64512, 0, 13056, 15, 0, 13056, 15, 0, 13056, 15, 0, 13056, 15, 0, 12672, 336, 0, 12672, 336, 0, 12672, 336, 0, 1168, 4369, 0, 1168, 4369, 0, 1168, 4369, 0, 1168, 4369, 0, 1184, 4369, 0, 1184, 4369, 0, 1184, 4369, 0, 1184, 4369, 0, 1200, 4369, 0, 1200, 4369, 0, 1200, 4369, 0, 1200, 4369, 0, 2836, 4113, 0, 2836, 4113, 0, 2836, 4113, 0, 2840, 4113, 0, 2840, 4113, 0, 2840, 4113, 0, 2852, 4113, 0, 2852, 4113, 0, 2852, 4113, 0, 2856, 4113, 0, 2856, 4113, 0, 2856, 4113, 0, 2868, 4113, 0, 2868, 4113, 0, 2868, 4113, 0, 2872, 4113, 0, 2872, 4113, 0, 2872, 4113, 0, 8768, 17476, 0, 8768, 17476, 0, 8768, 17476, 0, 8768, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 11008, 17476, 0, 11456, 34952, 0, 11456, 34952, 0, 11456, 34952, 0, 11456, 34952, 0, 13440, 64512, 0, 13440, 64512, 0, 13440, 64512, 0, 13440, 64512, 0, 13440, 64512, 0, 13440, 64512, 0, 13056, 15, 0, 13056, 15, 0, 13056, 15, 0, 13056, 15, 0, 12672, 336, 0, 12672, 336, 0, 12672, 336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582220070265922_895_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582220070265922_895_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c271a61a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582220070265922_895_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,444 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 14)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((288 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (338 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (366 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((392 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((402 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4416, 8194, 0, 4416, 8194, 0, 5952, 17476, 0, 5952, 17476, 0, 5952, 17476, 0, 5952, 17476, 0, 8960, 128, 0, 10624, 73, 0, 10624, 73, 0, 10624, 73, 0, 12048, 16, 0, 12064, 16, 0, 14288, 9216, 0, 14288, 9216, 0, 14304, 9216, 0, 14304, 9216, 0, 15248, 2, 0, 15264, 2, 0, 16464, 128, 0, 16480, 128, 0, 17552, 18432, 0, 17552, 18432, 0, 18896, 18432, 0, 18896, 18432, 0, 23424, 33288, 0, 23424, 33288, 0, 23424, 33288, 0, 24000, 1040, 0, 24000, 1040, 0, 25088, 16644, 0, 25088, 16644, 0, 25088, 16644, 0, 25104, 16644, 0, 25104, 16644, 0, 25104, 16644, 0, 25120, 16644, 0, 25120, 16644, 0, 25120, 16644, 0, 4416, 8194, 0, 4416, 8194, 0, 5952, 17476, 0, 5952, 17476, 0, 5952, 17476, 0, 5952, 17476, 0, 8960, 128, 0, 10624, 73, 0, 10624, 73, 0, 10624, 73, 0, 12048, 16, 0, 12064, 16, 0, 14288, 9216, 0, 14288, 9216, 0, 14304, 9216, 0, 14304, 9216, 0, 15248, 2, 0, 15264, 2, 0, 16464, 128, 0, 16480, 128, 0, 17552, 18432, 0, 17552, 18432, 0, 18896, 18432, 0, 18896, 18432, 0, 23424, 33288, 0, 23424, 33288, 0, 23424, 33288, 0, 24000, 1040, 0, 24000, 1040, 0, 25088, 16644, 0, 25088, 16644, 0, 25088, 16644, 0, 25104, 16644, 0, 25104, 16644, 0, 25104, 16644, 0, 25120, 16644, 0, 25120, 16644, 0, 25120, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582234768116335_897_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582234768116335_897_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a45da477 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582234768116335_897_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 5888, 9216, 0, 5888, 9216, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 5888, 9216, 0, 5888, 9216, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0, 7616, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582234999811142_898_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582234999811142_898_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582234999811142_898_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582235118350776_899_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582235118350776_899_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4a6c1e75 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582235118350776_899_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0, 1920, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582235266782276_900_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582235266782276_900_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1cf19b30 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582235266782276_900_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 1040, 0, 1792, 1040, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 1040, 0, 1792, 1040, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0, 2432, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582235429949769_901_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582235429949769_901_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0487167 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582235429949769_901_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2960, 4, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 2960, 4, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6800, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 6816, 21841, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0, 7360, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582256466980387_904_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582256466980387_904_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f04a9f24 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582256466980387_904_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,439 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((308 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((370 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((387 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((405 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (427 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (441 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((460 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((476 << 6) | (i6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 3408, 16384, 0, 5376, 256, 0, 10432, 1, 0, 13840, 1, 0, 13856, 1, 0, 13872, 1, 0, 14272, 1, 0, 14848, 1040, 0, 14848, 1040, 0, 15168, 18724, 0, 15168, 18724, 0, 15168, 18724, 0, 15168, 18724, 0, 15168, 18724, 0, 21504, 16, 0, 27328, 64, 0, 27904, 1024, 0, 28224, 16388, 0, 28224, 16388, 0, 29440, 34824, 0, 29440, 34824, 0, 29440, 34824, 0, 29456, 34824, 0, 29456, 34824, 0, 29456, 34824, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 3408, 16384, 0, 5376, 256, 0, 10432, 1, 0, 13840, 1, 0, 13856, 1, 0, 13872, 1, 0, 14272, 1, 0, 14848, 1040, 0, 14848, 1040, 0, 15168, 18724, 0, 15168, 18724, 0, 15168, 18724, 0, 15168, 18724, 0, 15168, 18724, 0, 21504, 16, 0, 27328, 64, 0, 27904, 1024, 0, 28224, 16388, 0, 28224, 16388, 0, 29440, 34824, 0, 29440, 34824, 0, 29440, 34824, 0, 29456, 34824, 0, 29456, 34824, 0, 29456, 34824, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582270813499308_905_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582270813499308_905_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f77abdc4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582270813499308_905_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,278 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1920, 8192, 0, 1936, 8192, 0, 6336, 32, 0, 6352, 32, 0, 7424, 8192, 0, 7440, 8192, 0, 10068, 16388, 0, 10068, 16388, 0, 10072, 16388, 0, 10072, 16388, 0, 10076, 16388, 0, 10076, 16388, 0, 10084, 16388, 0, 10084, 16388, 0, 10088, 16388, 0, 10088, 16388, 0, 10092, 16388, 0, 10092, 16388, 0, 10768, 16388, 0, 10768, 16388, 0, 10784, 16388, 0, 10784, 16388, 0, 11648, 16388, 0, 11648, 16388, 0, 12096, 34952, 0, 12096, 34952, 0, 12096, 34952, 0, 12096, 34952, 0, 576, 17, 0, 576, 17, 0, 1920, 8192, 0, 1936, 8192, 0, 6336, 32, 0, 6352, 32, 0, 7424, 8192, 0, 7440, 8192, 0, 10068, 16388, 0, 10068, 16388, 0, 10072, 16388, 0, 10072, 16388, 0, 10076, 16388, 0, 10076, 16388, 0, 10084, 16388, 0, 10084, 16388, 0, 10088, 16388, 0, 10088, 16388, 0, 10092, 16388, 0, 10092, 16388, 0, 10768, 16388, 0, 10768, 16388, 0, 10784, 16388, 0, 10784, 16388, 0, 11648, 16388, 0, 11648, 16388, 0, 12096, 34952, 0, 12096, 34952, 0, 12096, 34952, 0, 12096, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582271558323383_906_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582271558323383_906_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b74b6c7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582271558323383_906_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,319 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 11))) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4096, 34952, 0, 4096, 34952, 0, 4096, 34952, 0, 4096, 34952, 0, 4736, 17, 0, 4736, 17, 0, 6464, 32, 0, 6480, 32, 0, 6496, 32, 0, 8192, 8192, 0, 8196, 8192, 0, 8208, 8192, 0, 8212, 8192, 0, 8224, 8192, 0, 8228, 8192, 0, 9728, 32, 0, 9732, 32, 0, 9744, 32, 0, 9748, 32, 0, 9760, 32, 0, 9764, 32, 0, 12288, 16384, 0, 12736, 34952, 0, 12736, 34952, 0, 12736, 34952, 0, 12736, 34952, 0, 15936, 1, 0, 15952, 1, 0, 15968, 1, 0, 576, 17, 0, 576, 17, 0, 4096, 34952, 0, 4096, 34952, 0, 4096, 34952, 0, 4096, 34952, 0, 4736, 17, 0, 4736, 17, 0, 6464, 32, 0, 6480, 32, 0, 6496, 32, 0, 8192, 8192, 0, 8196, 8192, 0, 8208, 8192, 0, 8212, 8192, 0, 8224, 8192, 0, 8228, 8192, 0, 9728, 32, 0, 9732, 32, 0, 9744, 32, 0, 9748, 32, 0, 9760, 32, 0, 9764, 32, 0, 12288, 16384, 0, 12736, 34952, 0, 12736, 34952, 0, 12736, 34952, 0, 12736, 34952, 0, 15936, 1, 0, 15952, 1, 0, 15968, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582277735844784_908_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582277735844784_908_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2eac5cae --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582277735844784_908_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1856, 32775, 0, 1856, 32775, 0, 1856, 32775, 0, 1856, 32775, 0, 2496, 7, 0, 2496, 7, 0, 2496, 7, 0, 3200, 32769, 0, 3200, 32769, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1856, 32775, 0, 1856, 32775, 0, 1856, 32775, 0, 1856, 32775, 0, 2496, 7, 0, 2496, 7, 0, 2496, 7, 0, 3200, 32769, 0, 3200, 32769, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582282494151800_910_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582282494151800_910_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e85ed152 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582282494151800_910_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 16, 0, 3904, 256, 0, 5328, 8224, 0, 5328, 8224, 0, 5344, 8224, 0, 5344, 8224, 0, 6544, 8192, 0, 6548, 8192, 0, 6560, 8192, 0, 6564, 8192, 0, 7552, 17476, 0, 7552, 17476, 0, 7552, 17476, 0, 7552, 17476, 0, 8000, 34952, 0, 8000, 34952, 0, 8000, 34952, 0, 8000, 34952, 0, 768, 1, 0, 1344, 16, 0, 3904, 256, 0, 5328, 8224, 0, 5328, 8224, 0, 5344, 8224, 0, 5344, 8224, 0, 6544, 8192, 0, 6548, 8192, 0, 6560, 8192, 0, 6564, 8192, 0, 7552, 17476, 0, 7552, 17476, 0, 7552, 17476, 0, 7552, 17476, 0, 8000, 34952, 0, 8000, 34952, 0, 8000, 34952, 0, 8000, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582283748354847_912_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582283748354847_912_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fd708dc3 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582283748354847_912_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 492 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 2176, 8192, 0, 2192, 8192, 0, 2208, 8192, 0, 3072, 20485, 0, 3072, 20485, 0, 3072, 20485, 0, 3072, 20485, 0, 3088, 20485, 0, 3088, 20485, 0, 3088, 20485, 0, 3088, 20485, 0, 3104, 20485, 0, 3104, 20485, 0, 3104, 20485, 0, 3104, 20485, 0, 3712, 5, 0, 3712, 5, 0, 3728, 5, 0, 3728, 5, 0, 3744, 5, 0, 3744, 5, 0, 6592, 32778, 0, 6592, 32778, 0, 6592, 32778, 0, 6596, 32778, 0, 6596, 32778, 0, 6596, 32778, 0, 6600, 32778, 0, 6600, 32778, 0, 6600, 32778, 0, 6608, 32778, 0, 6608, 32778, 0, 6608, 32778, 0, 6612, 32778, 0, 6612, 32778, 0, 6612, 32778, 0, 6616, 32778, 0, 6616, 32778, 0, 6616, 32778, 0, 6624, 32778, 0, 6624, 32778, 0, 6624, 32778, 0, 6628, 32778, 0, 6628, 32778, 0, 6628, 32778, 0, 6632, 32778, 0, 6632, 32778, 0, 6632, 32778, 0, 7296, 40962, 0, 7296, 40962, 0, 7296, 40962, 0, 7300, 40962, 0, 7300, 40962, 0, 7300, 40962, 0, 7304, 40962, 0, 7304, 40962, 0, 7304, 40962, 0, 7312, 40962, 0, 7312, 40962, 0, 7312, 40962, 0, 7316, 40962, 0, 7316, 40962, 0, 7316, 40962, 0, 7320, 40962, 0, 7320, 40962, 0, 7320, 40962, 0, 7328, 40962, 0, 7328, 40962, 0, 7328, 40962, 0, 7332, 40962, 0, 7332, 40962, 0, 7332, 40962, 0, 7336, 40962, 0, 7336, 40962, 0, 7336, 40962, 0, 1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 1088, 63491, 0, 2176, 8192, 0, 2192, 8192, 0, 2208, 8192, 0, 3072, 20485, 0, 3072, 20485, 0, 3072, 20485, 0, 3072, 20485, 0, 3088, 20485, 0, 3088, 20485, 0, 3088, 20485, 0, 3088, 20485, 0, 3104, 20485, 0, 3104, 20485, 0, 3104, 20485, 0, 3104, 20485, 0, 3712, 5, 0, 3712, 5, 0, 3728, 5, 0, 3728, 5, 0, 3744, 5, 0, 3744, 5, 0, 6592, 32778, 0, 6592, 32778, 0, 6592, 32778, 0, 6596, 32778, 0, 6596, 32778, 0, 6596, 32778, 0, 6600, 32778, 0, 6600, 32778, 0, 6600, 32778, 0, 6608, 32778, 0, 6608, 32778, 0, 6608, 32778, 0, 6612, 32778, 0, 6612, 32778, 0, 6612, 32778, 0, 6616, 32778, 0, 6616, 32778, 0, 6616, 32778, 0, 6624, 32778, 0, 6624, 32778, 0, 6624, 32778, 0, 6628, 32778, 0, 6628, 32778, 0, 6628, 32778, 0, 6632, 32778, 0, 6632, 32778, 0, 6632, 32778, 0, 7296, 40962, 0, 7296, 40962, 0, 7296, 40962, 0, 7300, 40962, 0, 7300, 40962, 0, 7300, 40962, 0, 7304, 40962, 0, 7304, 40962, 0, 7304, 40962, 0, 7312, 40962, 0, 7312, 40962, 0, 7312, 40962, 0, 7316, 40962, 0, 7316, 40962, 0, 7316, 40962, 0, 7320, 40962, 0, 7320, 40962, 0, 7320, 40962, 0, 7328, 40962, 0, 7328, 40962, 0, 7328, 40962, 0, 7332, 40962, 0, 7332, 40962, 0, 7332, 40962, 0, 7336, 40962, 0, 7336, 40962, 0, 7336, 40962, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582290701499457_913_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582290701499457_913_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a8f07eb0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582290701499457_913_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,354 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((149 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 8)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((245 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 4097, 0, 1344, 4097, 0, 1360, 4097, 0, 1360, 4097, 0, 2500, 1, 0, 2504, 1, 0, 2508, 1, 0, 2516, 1, 0, 2520, 1, 0, 2524, 1, 0, 3140, 17, 0, 3140, 17, 0, 3144, 17, 0, 3144, 17, 0, 3148, 17, 0, 3148, 17, 0, 3156, 17, 0, 3156, 17, 0, 3160, 17, 0, 3160, 17, 0, 3164, 17, 0, 3164, 17, 0, 5316, 4097, 0, 5316, 4097, 0, 5320, 4097, 0, 5320, 4097, 0, 5324, 4097, 0, 5324, 4097, 0, 5332, 4097, 0, 5332, 4097, 0, 5336, 4097, 0, 5336, 4097, 0, 5340, 4097, 0, 5340, 4097, 0, 11648, 8194, 0, 11648, 8194, 0, 12480, 8194, 0, 12480, 8194, 0, 12800, 17476, 0, 12800, 17476, 0, 12800, 17476, 0, 12800, 17476, 0, 13440, 34816, 0, 13440, 34816, 0, 17344, 8, 0, 1344, 4097, 0, 1344, 4097, 0, 1360, 4097, 0, 1360, 4097, 0, 2500, 1, 0, 2504, 1, 0, 2508, 1, 0, 2516, 1, 0, 2520, 1, 0, 2524, 1, 0, 3140, 17, 0, 3140, 17, 0, 3144, 17, 0, 3144, 17, 0, 3148, 17, 0, 3148, 17, 0, 3156, 17, 0, 3156, 17, 0, 3160, 17, 0, 3160, 17, 0, 3164, 17, 0, 3164, 17, 0, 5316, 4097, 0, 5316, 4097, 0, 5320, 4097, 0, 5320, 4097, 0, 5324, 4097, 0, 5324, 4097, 0, 5332, 4097, 0, 5332, 4097, 0, 5336, 4097, 0, 5336, 4097, 0, 5340, 4097, 0, 5340, 4097, 0, 11648, 8194, 0, 11648, 8194, 0, 12480, 8194, 0, 12480, 8194, 0, 12800, 17476, 0, 12800, 17476, 0, 12800, 17476, 0, 12800, 17476, 0, 13440, 34816, 0, 13440, 34816, 0, 17344, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582302132554110_914_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582302132554110_914_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..05eda339 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582302132554110_914_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 6672, 2048, 0, 6676, 2048, 0, 6680, 2048, 0, 6688, 2048, 0, 6692, 2048, 0, 6696, 2048, 0, 7888, 128, 0, 7904, 128, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2832, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 2848, 64527, 0, 6672, 2048, 0, 6676, 2048, 0, 6680, 2048, 0, 6688, 2048, 0, 6692, 2048, 0, 6696, 2048, 0, 7888, 128, 0, 7904, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582320189856230_916_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582320189856230_916_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7707ecf5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582320189856230_916_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,70 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1616, 8728, 0, 1616, 8728, 0, 1616, 8728, 0, 1616, 8728, 0, 1632, 8728, 0, 1632, 8728, 0, 1632, 8728, 0, 1632, 8728, 0, 1616, 8728, 0, 1616, 8728, 0, 1616, 8728, 0, 1616, 8728, 0, 1632, 8728, 0, 1632, 8728, 0, 1632, 8728, 0, 1632, 8728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582320334843880_917_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582320334843880_917_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..801b895a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582320334843880_917_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 11)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 32775, 0, 1104, 32775, 0, 1104, 32775, 0, 1104, 32775, 0, 1120, 32775, 0, 1120, 32775, 0, 1120, 32775, 0, 1120, 32775, 0, 1744, 31, 0, 1744, 31, 0, 1744, 31, 0, 1744, 31, 0, 1744, 31, 0, 1760, 31, 0, 1760, 31, 0, 1760, 31, 0, 1760, 31, 0, 1760, 31, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 1104, 32775, 0, 1104, 32775, 0, 1104, 32775, 0, 1104, 32775, 0, 1120, 32775, 0, 1120, 32775, 0, 1120, 32775, 0, 1120, 32775, 0, 1744, 31, 0, 1744, 31, 0, 1744, 31, 0, 1744, 31, 0, 1744, 31, 0, 1760, 31, 0, 1760, 31, 0, 1760, 31, 0, 1760, 31, 0, 1760, 31, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2448, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0, 2464, 61447, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582320949847008_918_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582320949847008_918_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3bf2f62f --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582320949847008_918_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 8)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 16960, 8192, 0, 17872, 16384, 0, 17888, 16384, 0, 18768, 16644, 0, 18768, 16644, 0, 18768, 16644, 0, 18784, 16644, 0, 18784, 16644, 0, 18784, 16644, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 16960, 8192, 0, 17872, 16384, 0, 17888, 16384, 0, 18768, 16644, 0, 18768, 16644, 0, 18768, 16644, 0, 18784, 16644, 0, 18784, 16644, 0, 18784, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582321291949350_919_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582321291949350_919_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..654916f7 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582321291949350_919_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((191 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if ((i2 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 492 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1424, 1, 0, 1440, 1, 0, 1456, 1, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 6656, 8224, 0, 6656, 8224, 0, 7552, 8224, 0, 7552, 8224, 0, 8640, 8192, 0, 8656, 8192, 0, 8672, 8192, 0, 10688, 2048, 0, 10692, 2048, 0, 10696, 2048, 0, 10704, 2048, 0, 10708, 2048, 0, 10712, 2048, 0, 11328, 73, 0, 11328, 73, 0, 11328, 73, 0, 11332, 73, 0, 11332, 73, 0, 11332, 73, 0, 11336, 73, 0, 11336, 73, 0, 11336, 73, 0, 11344, 73, 0, 11344, 73, 0, 11344, 73, 0, 11348, 73, 0, 11348, 73, 0, 11348, 73, 0, 11352, 73, 0, 11352, 73, 0, 11352, 73, 0, 11904, 1040, 0, 11904, 1040, 0, 11908, 1040, 0, 11908, 1040, 0, 11912, 1040, 0, 11912, 1040, 0, 11920, 1040, 0, 11920, 1040, 0, 11924, 1040, 0, 11924, 1040, 0, 11928, 1040, 0, 11928, 1040, 0, 12224, 18692, 0, 12224, 18692, 0, 12224, 18692, 0, 12224, 18692, 0, 12228, 18692, 0, 12228, 18692, 0, 12228, 18692, 0, 12228, 18692, 0, 12232, 18692, 0, 12232, 18692, 0, 12232, 18692, 0, 12232, 18692, 0, 12240, 18692, 0, 12240, 18692, 0, 12240, 18692, 0, 12240, 18692, 0, 12244, 18692, 0, 12244, 18692, 0, 12244, 18692, 0, 12244, 18692, 0, 12248, 18692, 0, 12248, 18692, 0, 12248, 18692, 0, 12248, 18692, 0, 1424, 1, 0, 1440, 1, 0, 1456, 1, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 3904, 26214, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 4352, 34952, 0, 6656, 8224, 0, 6656, 8224, 0, 7552, 8224, 0, 7552, 8224, 0, 8640, 8192, 0, 8656, 8192, 0, 8672, 8192, 0, 10688, 2048, 0, 10692, 2048, 0, 10696, 2048, 0, 10704, 2048, 0, 10708, 2048, 0, 10712, 2048, 0, 11328, 73, 0, 11328, 73, 0, 11328, 73, 0, 11332, 73, 0, 11332, 73, 0, 11332, 73, 0, 11336, 73, 0, 11336, 73, 0, 11336, 73, 0, 11344, 73, 0, 11344, 73, 0, 11344, 73, 0, 11348, 73, 0, 11348, 73, 0, 11348, 73, 0, 11352, 73, 0, 11352, 73, 0, 11352, 73, 0, 11904, 1040, 0, 11904, 1040, 0, 11908, 1040, 0, 11908, 1040, 0, 11912, 1040, 0, 11912, 1040, 0, 11920, 1040, 0, 11920, 1040, 0, 11924, 1040, 0, 11924, 1040, 0, 11928, 1040, 0, 11928, 1040, 0, 12224, 18692, 0, 12224, 18692, 0, 12224, 18692, 0, 12224, 18692, 0, 12228, 18692, 0, 12228, 18692, 0, 12228, 18692, 0, 12228, 18692, 0, 12232, 18692, 0, 12232, 18692, 0, 12232, 18692, 0, 12232, 18692, 0, 12240, 18692, 0, 12240, 18692, 0, 12240, 18692, 0, 12240, 18692, 0, 12244, 18692, 0, 12244, 18692, 0, 12244, 18692, 0, 12244, 18692, 0, 12248, 18692, 0, 12248, 18692, 0, 12248, 18692, 0, 12248, 18692, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582363510434507_921_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582363510434507_921_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f0f736e0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582363510434507_921_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,468 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 14))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 5))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (357 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5376, 32768, 0, 7888, 32768, 0, 7904, 32768, 0, 10816, 2, 0, 13184, 16, 0, 13504, 18724, 0, 13504, 18724, 0, 13504, 18724, 0, 13504, 18724, 0, 13504, 18724, 0, 14336, 73, 0, 14336, 73, 0, 14336, 73, 0, 18432, 1040, 0, 18432, 1040, 0, 18752, 18724, 0, 18752, 18724, 0, 18752, 18724, 0, 18752, 18724, 0, 18752, 18724, 0, 5376, 32768, 0, 7888, 32768, 0, 7904, 32768, 0, 10816, 2, 0, 13184, 16, 0, 13504, 18724, 0, 13504, 18724, 0, 13504, 18724, 0, 13504, 18724, 0, 13504, 18724, 0, 14336, 73, 0, 14336, 73, 0, 14336, 73, 0, 18432, 1040, 0, 18432, 1040, 0, 18752, 18724, 0, 18752, 18724, 0, 18752, 18724, 0, 18752, 18724, 0, 18752, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582364323464137_922_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582364323464137_922_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..707471a0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582364323464137_922_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,276 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 0))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1792, 63, 0, 1792, 63, 0, 1792, 63, 0, 1792, 63, 0, 1792, 63, 0, 1792, 63, 0, 2432, 73, 0, 2432, 73, 0, 2432, 73, 0, 4928, 36, 0, 4928, 36, 0, 5824, 85, 0, 5824, 85, 0, 5824, 85, 0, 5824, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1792, 63, 0, 1792, 63, 0, 1792, 63, 0, 1792, 63, 0, 1792, 63, 0, 1792, 63, 0, 2432, 73, 0, 2432, 73, 0, 2432, 73, 0, 4928, 36, 0, 4928, 36, 0, 5824, 85, 0, 5824, 85, 0, 5824, 85, 0, 5824, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582365258633108_923_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582365258633108_923_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ee9d0687 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582365258633108_923_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,453 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (359 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((396 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((407 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (438 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (445 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (449 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 336 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 1920, 32768, 0, 1936, 32768, 0, 4352, 2056, 0, 4352, 2056, 0, 4356, 2056, 0, 4356, 2056, 0, 4368, 2056, 0, 4368, 2056, 0, 4372, 2056, 0, 4372, 2056, 0, 5824, 2048, 0, 5828, 2048, 0, 5840, 2048, 0, 5844, 2048, 0, 8128, 4096, 0, 8144, 4096, 0, 9344, 20480, 0, 9344, 20480, 0, 9348, 20480, 0, 9348, 20480, 0, 9360, 20480, 0, 9360, 20480, 0, 9364, 20480, 0, 9364, 20480, 0, 12096, 17, 0, 12096, 17, 0, 14912, 32, 0, 16512, 34952, 0, 16512, 34952, 0, 16512, 34952, 0, 16512, 34952, 0, 17152, 17, 0, 17152, 17, 0, 17792, 1, 0, 18368, 16, 0, 20800, 256, 0, 22272, 256, 0, 23296, 17476, 0, 23296, 17476, 0, 23296, 17476, 0, 23296, 17476, 0, 27584, 17472, 0, 27584, 17472, 0, 27584, 17472, 0, 28032, 34952, 0, 28032, 34952, 0, 28032, 34952, 0, 28032, 34952, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 960, 43690, 0, 1920, 32768, 0, 1936, 32768, 0, 4352, 2056, 0, 4352, 2056, 0, 4356, 2056, 0, 4356, 2056, 0, 4368, 2056, 0, 4368, 2056, 0, 4372, 2056, 0, 4372, 2056, 0, 5824, 2048, 0, 5828, 2048, 0, 5840, 2048, 0, 5844, 2048, 0, 8128, 4096, 0, 8144, 4096, 0, 9344, 20480, 0, 9344, 20480, 0, 9348, 20480, 0, 9348, 20480, 0, 9360, 20480, 0, 9360, 20480, 0, 9364, 20480, 0, 9364, 20480, 0, 12096, 17, 0, 12096, 17, 0, 14912, 32, 0, 16512, 34952, 0, 16512, 34952, 0, 16512, 34952, 0, 16512, 34952, 0, 17152, 17, 0, 17152, 17, 0, 17792, 1, 0, 18368, 16, 0, 20800, 256, 0, 22272, 256, 0, 23296, 17476, 0, 23296, 17476, 0, 23296, 17476, 0, 23296, 17476, 0, 27584, 17472, 0, 27584, 17472, 0, 27584, 17472, 0, 28032, 34952, 0, 28032, 34952, 0, 28032, 34952, 0, 28032, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582388311320935_924_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582388311320935_924_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582388311320935_924_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582392972442565_926_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582392972442565_926_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8df6f6e2 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582392972442565_926_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,126 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((34 << 6) | (i0 << 4)) | (counter1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((61 << 6) | (i0 << 4)) | (counter1 << 2)) | counter3); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2180, 4161, 0, 2180, 4161, 0, 2180, 4161, 0, 2181, 4161, 0, 2181, 4161, 0, 2181, 4161, 0, 2184, 4161, 0, 2184, 4161, 0, 2184, 4161, 0, 2185, 4161, 0, 2185, 4161, 0, 2185, 4161, 0, 2196, 4161, 0, 2196, 4161, 0, 2196, 4161, 0, 2197, 4161, 0, 2197, 4161, 0, 2197, 4161, 0, 2200, 4161, 0, 2200, 4161, 0, 2200, 4161, 0, 2201, 4161, 0, 2201, 4161, 0, 2201, 4161, 0, 2756, 1040, 0, 2756, 1040, 0, 2760, 1040, 0, 2760, 1040, 0, 2772, 1040, 0, 2772, 1040, 0, 2776, 1040, 0, 2776, 1040, 0, 3909, 18436, 0, 3909, 18436, 0, 3909, 18436, 0, 3910, 18436, 0, 3910, 18436, 0, 3910, 18436, 0, 3913, 18436, 0, 3913, 18436, 0, 3913, 18436, 0, 3914, 18436, 0, 3914, 18436, 0, 3914, 18436, 0, 3925, 18436, 0, 3925, 18436, 0, 3925, 18436, 0, 3926, 18436, 0, 3926, 18436, 0, 3926, 18436, 0, 3929, 18436, 0, 3929, 18436, 0, 3929, 18436, 0, 3930, 18436, 0, 3930, 18436, 0, 3930, 18436, 0, 4608, 1, 0, 4624, 1, 0, 2180, 4161, 0, 2180, 4161, 0, 2180, 4161, 0, 2181, 4161, 0, 2181, 4161, 0, 2181, 4161, 0, 2184, 4161, 0, 2184, 4161, 0, 2184, 4161, 0, 2185, 4161, 0, 2185, 4161, 0, 2185, 4161, 0, 2196, 4161, 0, 2196, 4161, 0, 2196, 4161, 0, 2197, 4161, 0, 2197, 4161, 0, 2197, 4161, 0, 2200, 4161, 0, 2200, 4161, 0, 2200, 4161, 0, 2201, 4161, 0, 2201, 4161, 0, 2201, 4161, 0, 2756, 1040, 0, 2756, 1040, 0, 2760, 1040, 0, 2760, 1040, 0, 2772, 1040, 0, 2772, 1040, 0, 2776, 1040, 0, 2776, 1040, 0, 3909, 18436, 0, 3909, 18436, 0, 3909, 18436, 0, 3910, 18436, 0, 3910, 18436, 0, 3910, 18436, 0, 3913, 18436, 0, 3913, 18436, 0, 3913, 18436, 0, 3914, 18436, 0, 3914, 18436, 0, 3914, 18436, 0, 3925, 18436, 0, 3925, 18436, 0, 3925, 18436, 0, 3926, 18436, 0, 3926, 18436, 0, 3926, 18436, 0, 3929, 18436, 0, 3929, 18436, 0, 3929, 18436, 0, 3930, 18436, 0, 3930, 18436, 0, 3930, 18436, 0, 4608, 1, 0, 4624, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582416934476990_927_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582416934476990_927_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..16587060 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582416934476990_927_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 2448, 40962, 0, 2448, 40962, 0, 2448, 40962, 0, 2464, 40962, 0, 2464, 40962, 0, 2464, 40962, 0, 2480, 40962, 0, 2480, 40962, 0, 2480, 40962, 0, 3280, 32768, 0, 3296, 32768, 0, 3312, 32768, 0, 4288, 1, 0, 8784, 1024, 0, 8800, 1024, 0, 8816, 1024, 0, 9088, 256, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 2448, 40962, 0, 2448, 40962, 0, 2448, 40962, 0, 2464, 40962, 0, 2464, 40962, 0, 2464, 40962, 0, 2480, 40962, 0, 2480, 40962, 0, 2480, 40962, 0, 3280, 32768, 0, 3296, 32768, 0, 3312, 32768, 0, 4288, 1, 0, 8784, 1024, 0, 8800, 1024, 0, 8816, 1024, 0, 9088, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582417520161541_928_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582417520161541_928_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f89302b5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582417520161541_928_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582417622486018_929_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582417622486018_929_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17278a1b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582417622486018_929_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582417743922504_930_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582417743922504_930_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..86f127f8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582417743922504_930_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,136 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 0))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + if ((i2 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2576, 4, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 7296, 8208, 0, 7296, 8208, 0, 7312, 8208, 0, 7312, 8208, 0, 7328, 8208, 0, 7328, 8208, 0, 2576, 4, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 6080, 43690, 0, 7296, 8208, 0, 7296, 8208, 0, 7312, 8208, 0, 7312, 8208, 0, 7328, 8208, 0, 7328, 8208, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582419745299342_931_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582419745299342_931_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28f728d1 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582419745299342_931_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,443 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((318 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((333 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (359 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((385 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((403 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((413 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((422 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((438 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (443 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 65, 0, 1792, 65, 0, 7552, 2, 0, 28048, 2, 0, 28064, 2, 0, 28352, 17476, 0, 28352, 17476, 0, 28352, 17476, 0, 28352, 17476, 0, 28800, 34952, 0, 28800, 34952, 0, 28800, 34952, 0, 28800, 34952, 0, 1792, 65, 0, 1792, 65, 0, 7552, 2, 0, 28048, 2, 0, 28064, 2, 0, 28352, 17476, 0, 28352, 17476, 0, 28352, 17476, 0, 28352, 17476, 0, 28800, 34952, 0, 28800, 34952, 0, 28800, 34952, 0, 28800, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582421569118003_932_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582421569118003_932_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0bb2774e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582421569118003_932_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 5185, 0, 3776, 5185, 0, 3776, 5185, 0, 3776, 5185, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 2624, 16384, 0, 3776, 5185, 0, 3776, 5185, 0, 3776, 5185, 0, 3776, 5185, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 3392, 43690, 0, 2624, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582440348705431_935_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582440348705431_935_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8ebc4636 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582440348705431_935_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,188 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 5952, 1040, 0, 5952, 1040, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 5952, 1040, 0, 5952, 1040, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0, 6592, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582440535425928_936_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582440535425928_936_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..94f08fc9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582440535425928_936_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 486 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1856, 32771, 0, 1856, 32771, 0, 1856, 32771, 0, 3088, 4097, 0, 3088, 4097, 0, 3104, 4097, 0, 3104, 4097, 0, 3664, 4097, 0, 3664, 4097, 0, 3680, 4097, 0, 3680, 4097, 0, 4224, 16, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 9488, 40960, 0, 9488, 40960, 0, 9504, 40960, 0, 9504, 40960, 0, 9520, 40960, 0, 9520, 40960, 0, 10644, 43018, 0, 10644, 43018, 0, 10644, 43018, 0, 10644, 43018, 0, 10644, 43018, 0, 10648, 43018, 0, 10648, 43018, 0, 10648, 43018, 0, 10648, 43018, 0, 10648, 43018, 0, 10660, 43018, 0, 10660, 43018, 0, 10660, 43018, 0, 10660, 43018, 0, 10660, 43018, 0, 10664, 43018, 0, 10664, 43018, 0, 10664, 43018, 0, 10664, 43018, 0, 10664, 43018, 0, 10676, 43018, 0, 10676, 43018, 0, 10676, 43018, 0, 10676, 43018, 0, 10676, 43018, 0, 10680, 43018, 0, 10680, 43018, 0, 10680, 43018, 0, 10680, 43018, 0, 10680, 43018, 0, 11088, 32768, 0, 11104, 32768, 0, 11120, 32768, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1856, 32771, 0, 1856, 32771, 0, 1856, 32771, 0, 3088, 4097, 0, 3088, 4097, 0, 3104, 4097, 0, 3104, 4097, 0, 3664, 4097, 0, 3664, 4097, 0, 3680, 4097, 0, 3680, 4097, 0, 4224, 16, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7616, 45083, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 7936, 65535, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 9488, 40960, 0, 9488, 40960, 0, 9504, 40960, 0, 9504, 40960, 0, 9520, 40960, 0, 9520, 40960, 0, 10644, 43018, 0, 10644, 43018, 0, 10644, 43018, 0, 10644, 43018, 0, 10644, 43018, 0, 10648, 43018, 0, 10648, 43018, 0, 10648, 43018, 0, 10648, 43018, 0, 10648, 43018, 0, 10660, 43018, 0, 10660, 43018, 0, 10660, 43018, 0, 10660, 43018, 0, 10660, 43018, 0, 10664, 43018, 0, 10664, 43018, 0, 10664, 43018, 0, 10664, 43018, 0, 10664, 43018, 0, 10676, 43018, 0, 10676, 43018, 0, 10676, 43018, 0, 10676, 43018, 0, 10676, 43018, 0, 10680, 43018, 0, 10680, 43018, 0, 10680, 43018, 0, 10680, 43018, 0, 10680, 43018, 0, 11088, 32768, 0, 11104, 32768, 0, 11120, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582445179267615_938_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582445179267615_938_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..16dcafa4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582445179267615_938_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 3520, 256, 0, 4160, 64, 0, 4736, 1040, 0, 4736, 1040, 0, 5056, 16388, 0, 5056, 16388, 0, 1216, 1, 0, 3520, 256, 0, 4160, 64, 0, 4736, 1040, 0, 4736, 1040, 0, 5056, 16388, 0, 5056, 16388, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582445488651765_939_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582445488651765_939_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..781d76fc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582445488651765_939_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5312, 8192, 0, 5328, 8192, 0, 5312, 8192, 0, 5328, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582445734839581_940_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582445734839581_940_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87cd25f4 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582445734839581_940_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582445854827856_941_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582445854827856_941_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b99adbf --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582445854827856_941_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,212 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 14)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 13712, 32, 0, 13728, 32, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 832, 21845, 0, 13712, 32, 0, 13728, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582469439559277_943_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582469439559277_943_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17278a1b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582469439559277_943_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582513984553754_948_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582513984553754_948_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6dd49ef --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582513984553754_948_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,412 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((257 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((268 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((275 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((342 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((353 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((370 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((384 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((395 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (404 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 876 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2256, 2080, 0, 2256, 2080, 0, 2272, 2080, 0, 2272, 2080, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 5456, 32768, 0, 5472, 32768, 0, 6080, 73, 0, 6080, 73, 0, 6080, 73, 0, 11216, 8192, 0, 11220, 8192, 0, 11224, 8192, 0, 11232, 8192, 0, 11236, 8192, 0, 11240, 8192, 0, 12496, 8192, 0, 12512, 8192, 0, 12800, 18724, 0, 12800, 18724, 0, 12800, 18724, 0, 12800, 18724, 0, 12800, 18724, 0, 14208, 36873, 0, 14208, 36873, 0, 14208, 36873, 0, 14208, 36873, 0, 14224, 36873, 0, 14224, 36873, 0, 14224, 36873, 0, 14224, 36873, 0, 14240, 36873, 0, 14240, 36873, 0, 14240, 36873, 0, 14240, 36873, 0, 15168, 32768, 0, 15172, 32768, 0, 15176, 32768, 0, 15184, 32768, 0, 15188, 32768, 0, 15192, 32768, 0, 15200, 32768, 0, 15204, 32768, 0, 15208, 32768, 0, 16448, 32769, 0, 16448, 32769, 0, 16452, 32769, 0, 16452, 32769, 0, 16456, 32769, 0, 16456, 32769, 0, 16464, 32769, 0, 16464, 32769, 0, 16468, 32769, 0, 16468, 32769, 0, 16472, 32769, 0, 16472, 32769, 0, 16480, 32769, 0, 16480, 32769, 0, 16484, 32769, 0, 16484, 32769, 0, 16488, 32769, 0, 16488, 32769, 0, 17152, 36865, 0, 17152, 36865, 0, 17152, 36865, 0, 17156, 36865, 0, 17156, 36865, 0, 17156, 36865, 0, 17160, 36865, 0, 17160, 36865, 0, 17160, 36865, 0, 17168, 36865, 0, 17168, 36865, 0, 17168, 36865, 0, 17172, 36865, 0, 17172, 36865, 0, 17172, 36865, 0, 17176, 36865, 0, 17176, 36865, 0, 17176, 36865, 0, 17184, 36865, 0, 17184, 36865, 0, 17184, 36865, 0, 17188, 36865, 0, 17188, 36865, 0, 17188, 36865, 0, 17192, 36865, 0, 17192, 36865, 0, 17192, 36865, 0, 18688, 36865, 0, 18688, 36865, 0, 18688, 36865, 0, 18704, 36865, 0, 18704, 36865, 0, 18704, 36865, 0, 18720, 36865, 0, 18720, 36865, 0, 18720, 36865, 0, 21888, 8192, 0, 21892, 8192, 0, 21896, 8192, 0, 21904, 8192, 0, 21908, 8192, 0, 21912, 8192, 0, 21920, 8192, 0, 21924, 8192, 0, 21928, 8192, 0, 22592, 8192, 0, 22608, 8192, 0, 22624, 8192, 0, 25600, 18724, 0, 25600, 18724, 0, 25600, 18724, 0, 25600, 18724, 0, 25600, 18724, 0, 2256, 2080, 0, 2256, 2080, 0, 2272, 2080, 0, 2272, 2080, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3152, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 3168, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4752, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 4768, 43690, 0, 5456, 32768, 0, 5472, 32768, 0, 6080, 73, 0, 6080, 73, 0, 6080, 73, 0, 11216, 8192, 0, 11220, 8192, 0, 11224, 8192, 0, 11232, 8192, 0, 11236, 8192, 0, 11240, 8192, 0, 12496, 8192, 0, 12512, 8192, 0, 12800, 18724, 0, 12800, 18724, 0, 12800, 18724, 0, 12800, 18724, 0, 12800, 18724, 0, 14208, 36873, 0, 14208, 36873, 0, 14208, 36873, 0, 14208, 36873, 0, 14224, 36873, 0, 14224, 36873, 0, 14224, 36873, 0, 14224, 36873, 0, 14240, 36873, 0, 14240, 36873, 0, 14240, 36873, 0, 14240, 36873, 0, 15168, 32768, 0, 15172, 32768, 0, 15176, 32768, 0, 15184, 32768, 0, 15188, 32768, 0, 15192, 32768, 0, 15200, 32768, 0, 15204, 32768, 0, 15208, 32768, 0, 16448, 32769, 0, 16448, 32769, 0, 16452, 32769, 0, 16452, 32769, 0, 16456, 32769, 0, 16456, 32769, 0, 16464, 32769, 0, 16464, 32769, 0, 16468, 32769, 0, 16468, 32769, 0, 16472, 32769, 0, 16472, 32769, 0, 16480, 32769, 0, 16480, 32769, 0, 16484, 32769, 0, 16484, 32769, 0, 16488, 32769, 0, 16488, 32769, 0, 17152, 36865, 0, 17152, 36865, 0, 17152, 36865, 0, 17156, 36865, 0, 17156, 36865, 0, 17156, 36865, 0, 17160, 36865, 0, 17160, 36865, 0, 17160, 36865, 0, 17168, 36865, 0, 17168, 36865, 0, 17168, 36865, 0, 17172, 36865, 0, 17172, 36865, 0, 17172, 36865, 0, 17176, 36865, 0, 17176, 36865, 0, 17176, 36865, 0, 17184, 36865, 0, 17184, 36865, 0, 17184, 36865, 0, 17188, 36865, 0, 17188, 36865, 0, 17188, 36865, 0, 17192, 36865, 0, 17192, 36865, 0, 17192, 36865, 0, 18688, 36865, 0, 18688, 36865, 0, 18688, 36865, 0, 18704, 36865, 0, 18704, 36865, 0, 18704, 36865, 0, 18720, 36865, 0, 18720, 36865, 0, 18720, 36865, 0, 21888, 8192, 0, 21892, 8192, 0, 21896, 8192, 0, 21904, 8192, 0, 21908, 8192, 0, 21912, 8192, 0, 21920, 8192, 0, 21924, 8192, 0, 21928, 8192, 0, 22592, 8192, 0, 22608, 8192, 0, 22624, 8192, 0, 25600, 18724, 0, 25600, 18724, 0, 25600, 18724, 0, 25600, 18724, 0, 25600, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582695748469729_949_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582695748469729_949_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff36ffbd --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582695748469729_949_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((106 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4992, 73, 0, 4992, 73, 0, 4992, 73, 0, 5904, 128, 0, 5920, 128, 0, 6804, 16, 0, 6808, 16, 0, 6820, 16, 0, 6824, 16, 0, 7444, 16, 0, 7448, 16, 0, 7460, 16, 0, 7464, 16, 0, 8340, 1024, 0, 8344, 1024, 0, 8356, 1024, 0, 8360, 1024, 0, 8788, 128, 0, 8792, 128, 0, 8804, 128, 0, 8808, 128, 0, 14272, 16676, 0, 14272, 16676, 0, 14272, 16676, 0, 14272, 16676, 0, 4992, 73, 0, 4992, 73, 0, 4992, 73, 0, 5904, 128, 0, 5920, 128, 0, 6804, 16, 0, 6808, 16, 0, 6820, 16, 0, 6824, 16, 0, 7444, 16, 0, 7448, 16, 0, 7460, 16, 0, 7464, 16, 0, 8340, 1024, 0, 8344, 1024, 0, 8356, 1024, 0, 8360, 1024, 0, 8788, 128, 0, 8792, 128, 0, 8804, 128, 0, 8808, 128, 0, 14272, 16676, 0, 14272, 16676, 0, 14272, 16676, 0, 14272, 16676, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582697582710321_950_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582697582710321_950_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b7d4da9a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582697582710321_950_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 15)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 2624, 32768, 0, 2640, 32768, 0, 3072, 32768, 0, 3088, 32768, 0, 3648, 1024, 0, 3968, 18436, 0, 3968, 18436, 0, 3968, 18436, 0, 13376, 256, 0, 13392, 256, 0, 13408, 256, 0, 576, 17, 0, 576, 17, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 1472, 17476, 0, 2624, 32768, 0, 2640, 32768, 0, 3072, 32768, 0, 3088, 32768, 0, 3648, 1024, 0, 3968, 18436, 0, 3968, 18436, 0, 3968, 18436, 0, 13376, 256, 0, 13392, 256, 0, 13408, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582698818278114_951_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582698818278114_951_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..54440360 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582698818278114_951_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 2304, 3, 0, 2304, 3, 0, 3344, 1, 0, 3360, 1, 0, 3376, 1, 0, 3920, 1, 0, 3936, 1, 0, 3952, 1, 0, 4608, 1088, 0, 4608, 1088, 0, 6144, 512, 0, 6160, 512, 0, 6176, 512, 0, 7296, 64, 0, 7744, 34952, 0, 7744, 34952, 0, 7744, 34952, 0, 7744, 34952, 0, 576, 17, 0, 576, 17, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 1152, 4369, 0, 2304, 3, 0, 2304, 3, 0, 3344, 1, 0, 3360, 1, 0, 3376, 1, 0, 3920, 1, 0, 3936, 1, 0, 3952, 1, 0, 4608, 1088, 0, 4608, 1088, 0, 6144, 512, 0, 6160, 512, 0, 6176, 512, 0, 7296, 64, 0, 7744, 34952, 0, 7744, 34952, 0, 7744, 34952, 0, 7744, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582699952488256_952_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582699952488256_952_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f88cf63b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582699952488256_952_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,346 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((353 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((372 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (381 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (404 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (408 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6864, 2, 0, 6880, 2, 0, 6896, 2, 0, 12736, 17476, 0, 12736, 17476, 0, 12736, 17476, 0, 12736, 17476, 0, 25280, 85, 0, 25280, 85, 0, 25280, 85, 0, 25280, 85, 0, 576, 17, 0, 576, 17, 0, 6864, 2, 0, 6880, 2, 0, 6896, 2, 0, 12736, 17476, 0, 12736, 17476, 0, 12736, 17476, 0, 12736, 17476, 0, 25280, 85, 0, 25280, 85, 0, 25280, 85, 0, 25280, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582702777060645_953_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582702777060645_953_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b227be8 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582702777060645_953_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 2752, 768, 0, 2752, 768, 0, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 1984, 34, 0, 1984, 34, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 3008, 64512, 0, 2752, 768, 0, 2752, 768, 0, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 1984, 34, 0, 1984, 34, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582702921956398_954_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582702921956398_954_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fe22e959 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582702921956398_954_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,129 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1616, 40960, 0, 1616, 40960, 0, 4816, 43010, 0, 4816, 43010, 0, 4816, 43010, 0, 4816, 43010, 0, 5264, 43008, 0, 5264, 43008, 0, 5264, 43008, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1616, 40960, 0, 1616, 40960, 0, 4816, 43010, 0, 4816, 43010, 0, 4816, 43010, 0, 4816, 43010, 0, 5264, 43008, 0, 5264, 43008, 0, 5264, 43008, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582703157041950_955_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582703157041950_955_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fd21a559 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582703157041950_955_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 22562, 0, 1856, 22562, 0, 1856, 22562, 0, 1856, 22562, 0, 1856, 22562, 0, 1600, 1, 0, 2496, 73, 0, 2496, 73, 0, 2496, 73, 0, 3072, 1040, 0, 3072, 1040, 0, 3392, 18724, 0, 3392, 18724, 0, 3392, 18724, 0, 3392, 18724, 0, 3392, 18724, 0, 1856, 22562, 0, 1856, 22562, 0, 1856, 22562, 0, 1856, 22562, 0, 1856, 22562, 0, 1600, 1, 0, 2496, 73, 0, 2496, 73, 0, 2496, 73, 0, 3072, 1040, 0, 3072, 1040, 0, 3392, 18724, 0, 3392, 18724, 0, 3392, 18724, 0, 3392, 18724, 0, 3392, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582703330579489_956_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582703330579489_956_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44e21213 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582703330579489_956_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 33288, 0, 2752, 33288, 0, 2752, 33288, 0, 3904, 36865, 0, 3904, 36865, 0, 3904, 36865, 0, 5440, 1040, 0, 5440, 1040, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6720, 73, 0, 6720, 73, 0, 6720, 73, 0, 8192, 1024, 0, 8208, 1024, 0, 15808, 16384, 0, 15824, 16384, 0, 2752, 33288, 0, 2752, 33288, 0, 2752, 33288, 0, 3904, 36865, 0, 3904, 36865, 0, 3904, 36865, 0, 5440, 1040, 0, 5440, 1040, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6080, 28086, 0, 6720, 73, 0, 6720, 73, 0, 6720, 73, 0, 8192, 1024, 0, 8208, 1024, 0, 15808, 16384, 0, 15824, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582714482527942_958_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582714482527942_958_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..98c5f553 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582714482527942_958_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,111 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((21 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 522 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 1, 0, 1364, 1, 0, 1376, 1, 0, 1380, 1, 0, 1392, 1, 0, 1396, 1, 0, 3600, 20805, 0, 3600, 20805, 0, 3600, 20805, 0, 3600, 20805, 0, 3600, 20805, 0, 3600, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4496, 512, 0, 4512, 512, 0, 4528, 512, 0, 1360, 1, 0, 1364, 1, 0, 1376, 1, 0, 1380, 1, 0, 1392, 1, 0, 1396, 1, 0, 3600, 20805, 0, 3600, 20805, 0, 3600, 20805, 0, 3600, 20805, 0, 3600, 20805, 0, 3600, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3604, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3616, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3620, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3632, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 3636, 20805, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4048, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4052, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4064, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4068, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4080, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4084, 65024, 0, 4496, 512, 0, 4512, 512, 0, 4528, 512, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582932751348509_963_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582932751348509_963_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b1583b1e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582932751348509_963_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 12)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 64, 0, 1472, 4129, 0, 1472, 4129, 0, 1472, 4129, 0, 2496, 73, 0, 2496, 73, 0, 2496, 73, 0, 3664, 8194, 0, 3664, 8194, 0, 3680, 8194, 0, 3680, 8194, 0, 6608, 128, 0, 6624, 128, 0, 7312, 9218, 0, 7312, 9218, 0, 7312, 9218, 0, 7328, 9218, 0, 7328, 9218, 0, 7328, 9218, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 1856, 64, 0, 1472, 4129, 0, 1472, 4129, 0, 1472, 4129, 0, 2496, 73, 0, 2496, 73, 0, 2496, 73, 0, 3664, 8194, 0, 3664, 8194, 0, 3680, 8194, 0, 3680, 8194, 0, 6608, 128, 0, 6624, 128, 0, 7312, 9218, 0, 7312, 9218, 0, 7312, 9218, 0, 7328, 9218, 0, 7328, 9218, 0, 7328, 9218, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756582933302001865_964_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756582933302001865_964_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a6cc458d --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756582933302001865_964_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,262 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 414 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 7, 0, 576, 7, 0, 576, 7, 0, 3392, 1, 0, 4416, 3, 0, 4416, 3, 0, 5840, 16904, 0, 5840, 16904, 0, 5840, 16904, 0, 5856, 16904, 0, 5856, 16904, 0, 5856, 16904, 0, 5872, 16904, 0, 5872, 16904, 0, 5872, 16904, 0, 7504, 2, 0, 7508, 2, 0, 7520, 2, 0, 7524, 2, 0, 7536, 2, 0, 7540, 2, 0, 8080, 2, 0, 8084, 2, 0, 8096, 2, 0, 8100, 2, 0, 8112, 2, 0, 8116, 2, 0, 11088, 8, 0, 11104, 8, 0, 11120, 8, 0, 13248, 4096, 0, 13264, 4096, 0, 13280, 4096, 0, 14336, 4100, 0, 14336, 4100, 0, 14352, 4100, 0, 14352, 4100, 0, 14368, 4100, 0, 14368, 4100, 0, 16000, 4100, 0, 16000, 4100, 0, 16016, 4100, 0, 16016, 4100, 0, 16032, 4100, 0, 16032, 4100, 0, 16832, 17745, 0, 16832, 17745, 0, 16832, 17745, 0, 16832, 17745, 0, 16832, 17745, 0, 16832, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16832, 4100, 0, 16832, 4100, 0, 16848, 4100, 0, 16848, 4100, 0, 16864, 4100, 0, 16864, 4100, 0, 576, 7, 0, 576, 7, 0, 576, 7, 0, 3392, 1, 0, 4416, 3, 0, 4416, 3, 0, 5840, 16904, 0, 5840, 16904, 0, 5840, 16904, 0, 5856, 16904, 0, 5856, 16904, 0, 5856, 16904, 0, 5872, 16904, 0, 5872, 16904, 0, 5872, 16904, 0, 7504, 2, 0, 7508, 2, 0, 7520, 2, 0, 7524, 2, 0, 7536, 2, 0, 7540, 2, 0, 8080, 2, 0, 8084, 2, 0, 8096, 2, 0, 8100, 2, 0, 8112, 2, 0, 8116, 2, 0, 11088, 8, 0, 11104, 8, 0, 11120, 8, 0, 13248, 4096, 0, 13264, 4096, 0, 13280, 4096, 0, 14336, 4100, 0, 14336, 4100, 0, 14352, 4100, 0, 14352, 4100, 0, 14368, 4100, 0, 14368, 4100, 0, 16000, 4100, 0, 16000, 4100, 0, 16016, 4100, 0, 16016, 4100, 0, 16032, 4100, 0, 16032, 4100, 0, 16832, 17745, 0, 16832, 17745, 0, 16832, 17745, 0, 16832, 17745, 0, 16832, 17745, 0, 16832, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16848, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16864, 17745, 0, 16832, 4100, 0, 16832, 4100, 0, 16848, 4100, 0, 16848, 4100, 0, 16864, 4100, 0, 16864, 4100, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583005014122454_965_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583005014122454_965_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a7d5a2a0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583005014122454_965_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5520, 2048, 0, 5536, 2048, 0, 5552, 2048, 0, 6720, 512, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 5520, 2048, 0, 5536, 2048, 0, 5552, 2048, 0, 6720, 512, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0, 7424, 61469, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583005194145468_966_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583005194145468_966_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..597687a6 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583005194145468_966_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,134 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 40960, 0, 3392, 40960, 0, 5184, 32768, 0, 6784, 16, 0, 6784, 8196, 0, 6784, 8196, 0, 8128, 16642, 0, 8128, 16642, 0, 8128, 16642, 0, 3392, 40960, 0, 3392, 40960, 0, 5184, 32768, 0, 6784, 16, 0, 6784, 8196, 0, 6784, 8196, 0, 8128, 16642, 0, 8128, 16642, 0, 8128, 16642, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583014946972873_970_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583014946972873_970_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b38408a9 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583014946972873_970_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 3))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5696, 8, 0, 6336, 85, 0, 6336, 85, 0, 6336, 85, 0, 6336, 85, 0, 10048, 4225, 0, 10048, 4225, 0, 10048, 4225, 0, 9792, 10258, 0, 9792, 10258, 0, 9792, 10258, 0, 9792, 10258, 0, 9536, 33320, 0, 9536, 33320, 0, 9536, 33320, 0, 9536, 33320, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5696, 8, 0, 6336, 85, 0, 6336, 85, 0, 6336, 85, 0, 6336, 85, 0, 10048, 4225, 0, 10048, 4225, 0, 10048, 4225, 0, 9792, 10258, 0, 9792, 10258, 0, 9792, 10258, 0, 9792, 10258, 0, 9536, 33320, 0, 9536, 33320, 0, 9536, 33320, 0, 9536, 33320, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583015244890775_971_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583015244890775_971_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b5f4fb49 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583015244890775_971_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,192 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2432, 8, 0, 3328, 2080, 0, 3328, 2080, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6336, 43018, 0, 6336, 43018, 0, 6336, 43018, 0, 6336, 43018, 0, 6336, 43018, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 2432, 8, 0, 3328, 2080, 0, 3328, 2080, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6720, 21845, 0, 6336, 43018, 0, 6336, 43018, 0, 6336, 43018, 0, 6336, 43018, 0, 6336, 43018, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583015593585692_972_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583015593585692_972_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..be201395 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583015593585692_972_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,297 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() >= 11)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((276 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 10)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((286 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((301 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 17, 0, 2048, 17, 0, 10688, 8738, 0, 10688, 8738, 0, 10688, 8738, 0, 10688, 8738, 0, 19268, 8, 0, 19272, 8, 0, 19276, 8, 0, 19284, 8, 0, 19288, 8, 0, 19292, 8, 0, 20224, 128, 0, 20240, 128, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 17, 0, 2048, 17, 0, 10688, 8738, 0, 10688, 8738, 0, 10688, 8738, 0, 10688, 8738, 0, 19268, 8, 0, 19272, 8, 0, 19276, 8, 0, 19284, 8, 0, 19288, 8, 0, 19292, 8, 0, 20224, 128, 0, 20240, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583017219756919_973_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583017219756919_973_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..77df596e --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583017219756919_973_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9))) { + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2368, 1040, 0, 2368, 1040, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 3328, 85, 0, 3328, 85, 0, 3328, 85, 0, 3328, 85, 0, 4544, 43018, 0, 4544, 43018, 0, 4544, 43018, 0, 4544, 43018, 0, 4544, 43018, 0, 4560, 43018, 0, 4560, 43018, 0, 4560, 43018, 0, 4560, 43018, 0, 4560, 43018, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2368, 1040, 0, 2368, 1040, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 2688, 18724, 0, 3328, 85, 0, 3328, 85, 0, 3328, 85, 0, 3328, 85, 0, 4544, 43018, 0, 4544, 43018, 0, 4544, 43018, 0, 4544, 43018, 0, 4544, 43018, 0, 4560, 43018, 0, 4560, 43018, 0, 4560, 43018, 0, 4560, 43018, 0, 4560, 43018, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583017832043941_974_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583017832043941_974_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a85bbef0 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583017832043941_974_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,385 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 15)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 12352, 64, 0, 12928, 1088, 0, 12928, 1088, 0, 13248, 17476, 0, 13248, 17476, 0, 13248, 17476, 0, 13248, 17476, 0, 16000, 8, 0, 18112, 2048, 0, 576, 17, 0, 576, 17, 0, 12352, 64, 0, 12928, 1088, 0, 12928, 1088, 0, 13248, 17476, 0, 13248, 17476, 0, 13248, 17476, 0, 13248, 17476, 0, 16000, 8, 0, 18112, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583093144393096_978_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583093144393096_978_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aad9b29c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583093144393096_978_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,168 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 14)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 564 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 3, 0, 1984, 3, 0, 1728, 49152, 0, 1728, 49152, 0, 1472, 10920, 0, 1472, 10920, 0, 1472, 10920, 0, 1472, 10920, 0, 1472, 10920, 0, 1472, 10920, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5760, 43018, 0, 5760, 43018, 0, 5760, 43018, 0, 5760, 43018, 0, 5760, 43018, 0, 5776, 43018, 0, 5776, 43018, 0, 5776, 43018, 0, 5776, 43018, 0, 5776, 43018, 0, 5792, 43018, 0, 5792, 43018, 0, 5792, 43018, 0, 5792, 43018, 0, 5792, 43018, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0, 1984, 3, 0, 1984, 3, 0, 1728, 49152, 0, 1728, 49152, 0, 1472, 10920, 0, 1472, 10920, 0, 1472, 10920, 0, 1472, 10920, 0, 1472, 10920, 0, 1472, 10920, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3392, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3408, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 3424, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5056, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5072, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5088, 21845, 0, 5760, 43018, 0, 5760, 43018, 0, 5760, 43018, 0, 5760, 43018, 0, 5760, 43018, 0, 5776, 43018, 0, 5776, 43018, 0, 5776, 43018, 0, 5776, 43018, 0, 5776, 43018, 0, 5792, 43018, 0, 5792, 43018, 0, 5792, 43018, 0, 5792, 43018, 0, 5792, 43018, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7424, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7440, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0, 7456, 127, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583097525563473_979_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583097525563473_979_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17278a1b --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583097525563473_979_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583097665391860_980_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583097665391860_980_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2914e25a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583097665391860_980_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,193 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3328, 16, 0, 3648, 256, 0, 5312, 17476, 0, 5312, 17476, 0, 5312, 17476, 0, 5312, 17476, 0, 5328, 17476, 0, 5328, 17476, 0, 5328, 17476, 0, 5328, 17476, 0, 5344, 17476, 0, 5344, 17476, 0, 5344, 17476, 0, 5344, 17476, 0, 9152, 17476, 0, 9152, 17476, 0, 9152, 17476, 0, 9152, 17476, 0, 9168, 17476, 0, 9168, 17476, 0, 9168, 17476, 0, 9168, 17476, 0, 9184, 17476, 0, 9184, 17476, 0, 9184, 17476, 0, 9184, 17476, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 3328, 16, 0, 3648, 256, 0, 5312, 17476, 0, 5312, 17476, 0, 5312, 17476, 0, 5312, 17476, 0, 5328, 17476, 0, 5328, 17476, 0, 5328, 17476, 0, 5328, 17476, 0, 5344, 17476, 0, 5344, 17476, 0, 5344, 17476, 0, 5344, 17476, 0, 9152, 17476, 0, 9152, 17476, 0, 9152, 17476, 0, 9152, 17476, 0, 9168, 17476, 0, 9168, 17476, 0, 9168, 17476, 0, 9168, 17476, 0, 9184, 17476, 0, 9184, 17476, 0, 9184, 17476, 0, 9184, 17476, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10560, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0, 10576, 59406, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583100323478486_981_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583100323478486_981_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..450032ae --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583100323478486_981_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,74 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 12)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 61440, 0, 896, 61440, 0, 896, 61440, 0, 896, 61440, 0, 912, 61440, 0, 912, 61440, 0, 912, 61440, 0, 912, 61440, 0, 928, 61440, 0, 928, 61440, 0, 928, 61440, 0, 928, 61440, 0, 896, 61440, 0, 896, 61440, 0, 896, 61440, 0, 896, 61440, 0, 912, 61440, 0, 912, 61440, 0, 912, 61440, 0, 912, 61440, 0, 928, 61440, 0, 928, 61440, 0, 928, 61440, 0, 928, 61440, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583100661338089_982_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583100661338089_982_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..72406836 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583100661338089_982_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 12))) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 14))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((188 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((216 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 516 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5312, 32769, 0, 5312, 32769, 0, 6592, 32769, 0, 6592, 32769, 0, 7760, 32777, 0, 7760, 32777, 0, 7760, 32777, 0, 7776, 32777, 0, 7776, 32777, 0, 7776, 32777, 0, 7792, 32777, 0, 7792, 32777, 0, 7792, 32777, 0, 8320, 1040, 0, 8320, 1040, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 12740, 1, 0, 12744, 1, 0, 12756, 1, 0, 12760, 1, 0, 14272, 1, 0, 14288, 1, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 17152, 57345, 0, 17152, 57345, 0, 17152, 57345, 0, 17152, 57345, 0, 17168, 57345, 0, 17168, 57345, 0, 17168, 57345, 0, 17168, 57345, 0, 17184, 57345, 0, 17184, 57345, 0, 17184, 57345, 0, 17184, 57345, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 5312, 32769, 0, 5312, 32769, 0, 6592, 32769, 0, 6592, 32769, 0, 7760, 32777, 0, 7760, 32777, 0, 7760, 32777, 0, 7776, 32777, 0, 7776, 32777, 0, 7776, 32777, 0, 7792, 32777, 0, 7792, 32777, 0, 7792, 32777, 0, 8320, 1040, 0, 8320, 1040, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 8640, 18724, 0, 12740, 1, 0, 12744, 1, 0, 12756, 1, 0, 12760, 1, 0, 14272, 1, 0, 14288, 1, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16000, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16016, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 16032, 43690, 0, 17152, 57345, 0, 17152, 57345, 0, 17152, 57345, 0, 17152, 57345, 0, 17168, 57345, 0, 17168, 57345, 0, 17168, 57345, 0, 17168, 57345, 0, 17184, 57345, 0, 17184, 57345, 0, 17184, 57345, 0, 17184, 57345, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17728, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17744, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0, 17760, 21845, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583103701310784_983_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583103701310784_983_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66cd240c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583103701310784_983_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 9)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 5))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 16384, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 8768, 4616, 0, 8768, 4616, 0, 8768, 4616, 0, 8784, 4616, 0, 8784, 4616, 0, 8784, 4616, 0, 8800, 4616, 0, 8800, 4616, 0, 8800, 4616, 0, 9984, 4609, 0, 9984, 4609, 0, 9984, 4609, 0, 10000, 4609, 0, 10000, 4609, 0, 10000, 4609, 0, 10016, 4609, 0, 10016, 4609, 0, 10016, 4609, 0, 10560, 1040, 0, 10560, 1040, 0, 13328, 18436, 0, 13328, 18436, 0, 13328, 18436, 0, 13332, 18436, 0, 13332, 18436, 0, 13332, 18436, 0, 13344, 18436, 0, 13344, 18436, 0, 13344, 18436, 0, 13348, 18436, 0, 13348, 18436, 0, 13348, 18436, 0, 13360, 18436, 0, 13360, 18436, 0, 13360, 18436, 0, 13364, 18436, 0, 13364, 18436, 0, 13364, 18436, 0, 3072, 16384, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6592, 255, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 6208, 65280, 0, 8768, 4616, 0, 8768, 4616, 0, 8768, 4616, 0, 8784, 4616, 0, 8784, 4616, 0, 8784, 4616, 0, 8800, 4616, 0, 8800, 4616, 0, 8800, 4616, 0, 9984, 4609, 0, 9984, 4609, 0, 9984, 4609, 0, 10000, 4609, 0, 10000, 4609, 0, 10000, 4609, 0, 10016, 4609, 0, 10016, 4609, 0, 10016, 4609, 0, 10560, 1040, 0, 10560, 1040, 0, 13328, 18436, 0, 13328, 18436, 0, 13328, 18436, 0, 13332, 18436, 0, 13332, 18436, 0, 13332, 18436, 0, 13344, 18436, 0, 13344, 18436, 0, 13344, 18436, 0, 13348, 18436, 0, 13348, 18436, 0, 13348, 18436, 0, 13360, 18436, 0, 13360, 18436, 0, 13360, 18436, 0, 13364, 18436, 0, 13364, 18436, 0, 13364, 18436, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583105391115320_984_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583105391115320_984_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9655569c --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583105391115320_984_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,326 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 15))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [12800, 1024, 0, 13120, 4, 0, 14592, 1024, 0, 14608, 1024, 0, 19264, 81, 0, 19264, 81, 0, 19264, 81, 0, 21056, 16450, 0, 21056, 16450, 0, 21056, 16450, 0, 12800, 1024, 0, 13120, 4, 0, 14592, 1024, 0, 14608, 1024, 0, 19264, 81, 0, 19264, 81, 0, 19264, 81, 0, 21056, 16450, 0, 21056, 16450, 0, 21056, 16450, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583105794195489_985_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583105794195489_985_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f594f7dc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583105794195489_985_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,126 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 9)) { + if ((WaveGetLaneIndex() >= 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 32768, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 4756, 63488, 0, 4756, 63488, 0, 4756, 63488, 0, 4756, 63488, 0, 4756, 63488, 0, 4760, 63488, 0, 4760, 63488, 0, 4760, 63488, 0, 4760, 63488, 0, 4760, 63488, 0, 4772, 63488, 0, 4772, 63488, 0, 4772, 63488, 0, 4772, 63488, 0, 4772, 63488, 0, 4776, 63488, 0, 4776, 63488, 0, 4776, 63488, 0, 4776, 63488, 0, 4776, 63488, 0, 5392, 2048, 0, 5408, 2048, 0, 576, 32768, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2388, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2392, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2404, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 2408, 65024, 0, 4756, 63488, 0, 4756, 63488, 0, 4756, 63488, 0, 4756, 63488, 0, 4756, 63488, 0, 4760, 63488, 0, 4760, 63488, 0, 4760, 63488, 0, 4760, 63488, 0, 4760, 63488, 0, 4772, 63488, 0, 4772, 63488, 0, 4772, 63488, 0, 4772, 63488, 0, 4772, 63488, 0, 4776, 63488, 0, 4776, 63488, 0, 4776, 63488, 0, 4776, 63488, 0, 4776, 63488, 0, 5392, 2048, 0, 5408, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583208917911189_989_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583208917911189_989_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..84682481 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583208917911189_989_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 8064, 17476, 0, 8064, 17476, 0, 8064, 17476, 0, 8064, 17476, 0, 9216, 32776, 0, 9216, 32776, 0, 1280, 1, 0, 8064, 17476, 0, 8064, 17476, 0, 8064, 17476, 0, 8064, 17476, 0, 9216, 32776, 0, 9216, 32776, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583209105497972_990_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583209105497972_990_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dac52edc --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583209105497972_990_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1408, 1, 0, 1856, 1, 0, 2432, 16, 0, 2752, 288, 0, 2752, 288, 0, 3920, 16385, 0, 3920, 16385, 0, 3936, 16385, 0, 3936, 16385, 0, 3952, 16385, 0, 3952, 16385, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5968, 8192, 0, 5984, 8192, 0, 6000, 8192, 0, 6672, 16387, 0, 6672, 16387, 0, 6672, 16387, 0, 6688, 16387, 0, 6688, 16387, 0, 6688, 16387, 0, 6704, 16387, 0, 6704, 16387, 0, 6704, 16387, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 576, 17, 0, 576, 17, 0, 1408, 1, 0, 1856, 1, 0, 2432, 16, 0, 2752, 288, 0, 2752, 288, 0, 3920, 16385, 0, 3920, 16385, 0, 3936, 16385, 0, 3936, 16385, 0, 3952, 16385, 0, 3952, 16385, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5520, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5536, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5552, 21845, 0, 5968, 8192, 0, 5984, 8192, 0, 6000, 8192, 0, 6672, 16387, 0, 6672, 16387, 0, 6672, 16387, 0, 6688, 16387, 0, 6688, 16387, 0, 6688, 16387, 0, 6704, 16387, 0, 6704, 16387, 0, 6704, 16387, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0, 8832, 18724, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583219545306966_991_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583219545306966_991_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..de5f34ca --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583219545306966_991_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 14)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2304, 8200, 0, 2304, 8200, 0, 2320, 8200, 0, 2320, 8200, 0, 2336, 8200, 0, 2336, 8200, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 4096, 17, 0, 4096, 17, 0, 5824, 16384, 0, 7232, 16384, 0, 7248, 16384, 0, 7264, 16384, 0, 8320, 16384, 0, 8768, 34952, 0, 8768, 34952, 0, 8768, 34952, 0, 8768, 34952, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2304, 8200, 0, 2304, 8200, 0, 2320, 8200, 0, 2320, 8200, 0, 2336, 8200, 0, 2336, 8200, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 3456, 65535, 0, 4096, 17, 0, 4096, 17, 0, 5824, 16384, 0, 7232, 16384, 0, 7248, 16384, 0, 7264, 16384, 0, 8320, 16384, 0, 8768, 34952, 0, 8768, 34952, 0, 8768, 34952, 0, 8768, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583220357886050_992_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583220357886050_992_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..650b61af --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583220357886050_992_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2704, 33280, 0, 2704, 33280, 0, 2720, 33280, 0, 2720, 33280, 0, 2736, 33280, 0, 2736, 33280, 0, 3904, 2080, 0, 3904, 2080, 0, 6016, 8, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2704, 33280, 0, 2704, 33280, 0, 2720, 33280, 0, 2720, 33280, 0, 2736, 33280, 0, 2736, 33280, 0, 3904, 2080, 0, 3904, 2080, 0, 6016, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583220802758633_993_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583220802758633_993_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..93ed7070 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583220802758633_993_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,382 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((360 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + if ((i6 == 1)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 498 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 4, 0, 1056, 4, 0, 2964, 321, 0, 2964, 321, 0, 2964, 321, 0, 2968, 321, 0, 2968, 321, 0, 2968, 321, 0, 2972, 321, 0, 2972, 321, 0, 2972, 321, 0, 2980, 321, 0, 2980, 321, 0, 2980, 321, 0, 2984, 321, 0, 2984, 321, 0, 2984, 321, 0, 2988, 321, 0, 2988, 321, 0, 2988, 321, 0, 4436, 5120, 0, 4436, 5120, 0, 4440, 5120, 0, 4440, 5120, 0, 4444, 5120, 0, 4444, 5120, 0, 4452, 5120, 0, 4452, 5120, 0, 4456, 5120, 0, 4456, 5120, 0, 4460, 5120, 0, 4460, 5120, 0, 8576, 8, 0, 8592, 8, 0, 9408, 17, 0, 9408, 17, 0, 9984, 4369, 0, 9984, 4369, 0, 9984, 4369, 0, 9984, 4369, 0, 10304, 4369, 0, 10304, 4369, 0, 10304, 4369, 0, 10304, 4369, 0, 14720, 68, 0, 14720, 68, 0, 14736, 68, 0, 14736, 68, 0, 15296, 68, 0, 15296, 68, 0, 15312, 68, 0, 15312, 68, 0, 15744, 34952, 0, 15744, 34952, 0, 15744, 34952, 0, 15744, 34952, 0, 17664, 4, 0, 18240, 43170, 0, 18240, 43170, 0, 18240, 43170, 0, 18240, 43170, 0, 18240, 43170, 0, 18240, 43170, 0, 19584, 1, 0, 20416, 1, 0, 21776, 34, 0, 21776, 34, 0, 21792, 34, 0, 21792, 34, 0, 22080, 17472, 0, 22080, 17472, 0, 22080, 17472, 0, 23040, 34816, 0, 23040, 34816, 0, 23056, 34816, 0, 23056, 34816, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0, 1040, 4, 0, 1056, 4, 0, 2964, 321, 0, 2964, 321, 0, 2964, 321, 0, 2968, 321, 0, 2968, 321, 0, 2968, 321, 0, 2972, 321, 0, 2972, 321, 0, 2972, 321, 0, 2980, 321, 0, 2980, 321, 0, 2980, 321, 0, 2984, 321, 0, 2984, 321, 0, 2984, 321, 0, 2988, 321, 0, 2988, 321, 0, 2988, 321, 0, 4436, 5120, 0, 4436, 5120, 0, 4440, 5120, 0, 4440, 5120, 0, 4444, 5120, 0, 4444, 5120, 0, 4452, 5120, 0, 4452, 5120, 0, 4456, 5120, 0, 4456, 5120, 0, 4460, 5120, 0, 4460, 5120, 0, 8576, 8, 0, 8592, 8, 0, 9408, 17, 0, 9408, 17, 0, 9984, 4369, 0, 9984, 4369, 0, 9984, 4369, 0, 9984, 4369, 0, 10304, 4369, 0, 10304, 4369, 0, 10304, 4369, 0, 10304, 4369, 0, 14720, 68, 0, 14720, 68, 0, 14736, 68, 0, 14736, 68, 0, 15296, 68, 0, 15296, 68, 0, 15312, 68, 0, 15312, 68, 0, 15744, 34952, 0, 15744, 34952, 0, 15744, 34952, 0, 15744, 34952, 0, 17664, 4, 0, 18240, 43170, 0, 18240, 43170, 0, 18240, 43170, 0, 18240, 43170, 0, 18240, 43170, 0, 18240, 43170, 0, 19584, 1, 0, 20416, 1, 0, 21776, 34, 0, 21776, 34, 0, 21792, 34, 0, 21792, 34, 0, 22080, 17472, 0, 22080, 17472, 0, 22080, 17472, 0, 23040, 34816, 0, 23040, 34816, 0, 23056, 34816, 0, 23056, 34816, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0, 24000, 21841, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583238288528309_994_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583238288528309_994_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6bff3f08 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583238288528309_994_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,477 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((340 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((349 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((356 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((373 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((400 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((409 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15))) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((447 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (464 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4800, 1, 0, 5376, 16, 0, 5696, 256, 0, 6400, 4096, 0, 6720, 17476, 0, 6720, 17476, 0, 6720, 17476, 0, 6720, 17476, 0, 9984, 8, 0, 10624, 17, 0, 10624, 17, 0, 13264, 32, 0, 13280, 32, 0, 18560, 17476, 0, 18560, 17476, 0, 18560, 17476, 0, 18560, 17476, 0, 18576, 17476, 0, 18576, 17476, 0, 18576, 17476, 0, 18576, 17476, 0, 18592, 17476, 0, 18592, 17476, 0, 18592, 17476, 0, 18592, 17476, 0, 19200, 34952, 0, 19200, 34952, 0, 19200, 34952, 0, 19200, 34952, 0, 25616, 1024, 0, 29696, 64, 0, 576, 17, 0, 576, 17, 0, 4800, 1, 0, 5376, 16, 0, 5696, 256, 0, 6400, 4096, 0, 6720, 17476, 0, 6720, 17476, 0, 6720, 17476, 0, 6720, 17476, 0, 9984, 8, 0, 10624, 17, 0, 10624, 17, 0, 13264, 32, 0, 13280, 32, 0, 18560, 17476, 0, 18560, 17476, 0, 18560, 17476, 0, 18560, 17476, 0, 18576, 17476, 0, 18576, 17476, 0, 18576, 17476, 0, 18576, 17476, 0, 18592, 17476, 0, 18592, 17476, 0, 18592, 17476, 0, 18592, 17476, 0, 19200, 34952, 0, 19200, 34952, 0, 19200, 34952, 0, 19200, 34952, 0, 25616, 1024, 0, 29696, 64, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583240332497390_995_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583240332497390_995_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e113b5de --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583240332497390_995_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 4)) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 4032, 36, 0, 4032, 36, 0, 5456, 32, 0, 5472, 32, 0, 5488, 32, 0, 10960, 8232, 0, 10960, 8232, 0, 10960, 8232, 0, 10976, 8232, 0, 10976, 8232, 0, 10976, 8232, 0, 10992, 8232, 0, 10992, 8232, 0, 10992, 8232, 0, 12416, 8224, 0, 12416, 8224, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14592, 2, 0, 14080, 32768, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1040, 0, 1152, 1040, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 1472, 18724, 0, 4032, 36, 0, 4032, 36, 0, 5456, 32, 0, 5472, 32, 0, 5488, 32, 0, 10960, 8232, 0, 10960, 8232, 0, 10960, 8232, 0, 10976, 8232, 0, 10976, 8232, 0, 10976, 8232, 0, 10992, 8232, 0, 10992, 8232, 0, 10992, 8232, 0, 12416, 8224, 0, 12416, 8224, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14976, 21845, 0, 14592, 2, 0, 14080, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583241231456354_996_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583241231456354_996_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0d1068f6 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583241231456354_996_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 16, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 1216, 16, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0, 832, 43690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583241375613665_997_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583241375613665_997_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b7bfcf5 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583241375613665_997_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 13))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 6))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 5184, 34952, 0, 5184, 34952, 0, 5184, 34952, 0, 5184, 34952, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 8640, 32768, 0, 9280, 1, 0, 10624, 32768, 0, 12544, 24582, 0, 12544, 24582, 0, 12544, 24582, 0, 12544, 24582, 0, 12544, 32769, 0, 12544, 32769, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 1152, 5201, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 2176, 43690, 0, 5184, 34952, 0, 5184, 34952, 0, 5184, 34952, 0, 5184, 34952, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 5760, 43690, 0, 8640, 32768, 0, 9280, 1, 0, 10624, 32768, 0, 12544, 24582, 0, 12544, 24582, 0, 12544, 24582, 0, 12544, 24582, 0, 12544, 32769, 0, 12544, 32769, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583242493997494_998_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583242493997494_998_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e3d81a10 --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583242493997494_998_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 5)) || (WaveGetLaneIndex() == 12))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 5)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 15))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6032, 512, 0, 6048, 512, 0, 7040, 1040, 0, 7040, 1040, 0, 6032, 512, 0, 6048, 512, 0, 7040, 1040, 0, 7040, 1040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize16BitTracking/tests/program_1756583257752885558_1000_increment_0_WaveParticipantBitTracking.test b/test/WaveSize16BitTracking/tests/program_1756583257752885558_1000_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e64b0f2a --- /dev/null +++ b/test/WaveSize16BitTracking/tests/program_1756583257752885558_1000_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,333 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(32, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 11))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 6)) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 7)) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 12))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 14))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 32 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4160, 1, 0, 9168, 16448, 0, 9168, 16448, 0, 12736, 34952, 0, 12736, 34952, 0, 12736, 34952, 0, 12736, 34952, 0, 13376, 85, 0, 13376, 85, 0, 13376, 85, 0, 13376, 85, 0, 14592, 17, 0, 14592, 17, 0, 15488, 17476, 0, 15488, 17476, 0, 15488, 17476, 0, 15488, 17476, 0, 16832, 32768, 0, 17728, 2048, 0, 4160, 1, 0, 9168, 16448, 0, 9168, 16448, 0, 12736, 34952, 0, 12736, 34952, 0, 12736, 34952, 0, 12736, 34952, 0, 13376, 85, 0, 13376, 85, 0, 13376, 85, 0, 13376, 85, 0, 14592, 17, 0, 14592, 17, 0, 15488, 17476, 0, 15488, 17476, 0, 15488, 17476, 0, 15488, 17476, 0, 16832, 32768, 0, 17728, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364070158858555_1_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364070158858555_1_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4c58384b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364070158858555_1_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,268 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((26 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1200 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4112, 4456448, 0, 4112, 4456448, 0, 4128, 4456448, 0, 4128, 4456448, 0, 7248, 33685568, 0, 7248, 33685568, 0, 7248, 33685568, 0, 7264, 33685568, 0, 7264, 33685568, 0, 7264, 33685568, 0, 7696, 16384, 0, 7712, 16384, 0, 8912, 64, 0, 8928, 64, 0, 9728, 1207959552, 0, 9728, 1207959552, 0, 10304, 4195328, 0, 10304, 4195328, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 13248, 17, 0, 13248, 17, 0, 13264, 17, 0, 13264, 17, 0, 13280, 17, 0, 13280, 17, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 4112, 4456448, 0, 4112, 4456448, 0, 4128, 4456448, 0, 4128, 4456448, 0, 7248, 33685568, 0, 7248, 33685568, 0, 7248, 33685568, 0, 7264, 33685568, 0, 7264, 33685568, 0, 7264, 33685568, 0, 7696, 16384, 0, 7712, 16384, 0, 8912, 64, 0, 8928, 64, 0, 9728, 1207959552, 0, 9728, 1207959552, 0, 10304, 4195328, 0, 10304, 4195328, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 10624, 612517924, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11712, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12608, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12624, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 12640, 1431655765, 0, 13248, 17, 0, 13248, 17, 0, 13264, 17, 0, 13264, 17, 0, 13280, 17, 0, 13280, 17, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13824, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13840, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 13856, 286331153, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14144, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14160, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0, 14176, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364130263378791_2_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364130263378791_2_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a4550680 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364130263378791_2_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 10)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6080, 1024, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 6080, 1024, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 9024, 1431655765, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0, 8512, 2852126762, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364350988132124_5_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364350988132124_5_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0d360a06 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364350988132124_5_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 832, 3, 0, 832, 3, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 1216, 4294705152, 0, 832, 3, 0, 832, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364490308865239_9_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364490308865239_9_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..15b61cec --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364490308865239_9_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2048, 67108864, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2048, 67108864, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364490517084198_10_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364490517084198_10_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..61c1115f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364490517084198_10_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,111 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 2640, 33554432, 0, 2656, 33554432, 0, 3792, 2684354560, 0, 3792, 2684354560, 0, 3808, 2684354560, 0, 3808, 2684354560, 0, 4496, 2684354560, 0, 4496, 2684354560, 0, 4512, 2684354560, 0, 4512, 2684354560, 0, 5712, 134219776, 0, 5712, 134219776, 0, 5728, 134219776, 0, 5728, 134219776, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 2640, 33554432, 0, 2656, 33554432, 0, 3792, 2684354560, 0, 3792, 2684354560, 0, 3808, 2684354560, 0, 3808, 2684354560, 0, 4496, 2684354560, 0, 4496, 2684354560, 0, 4512, 2684354560, 0, 4512, 2684354560, 0, 5712, 134219776, 0, 5712, 134219776, 0, 5728, 134219776, 0, 5728, 134219776, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364549318902619_12_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364549318902619_12_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c393c838 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364549318902619_12_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 10)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 4096, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 4864, 272696336, 0, 4864, 272696336, 0, 4864, 272696336, 0, 4864, 272696336, 0, 4864, 272696336, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 2688, 4096, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3904, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 3920, 1226833993, 0, 4864, 272696336, 0, 4864, 272696336, 0, 4864, 272696336, 0, 4864, 272696336, 0, 4864, 272696336, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0, 5184, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364550283621975_13_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364550283621975_13_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c5c94b7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364550283621975_13_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,136 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 1, 0, 2064, 1, 0, 2080, 1, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 3392, 272696336, 0, 3392, 272696336, 0, 3392, 272696336, 0, 3392, 272696336, 0, 3392, 272696336, 0, 3712, 68174084, 0, 3712, 68174084, 0, 3712, 68174084, 0, 3712, 68174084, 0, 3712, 68174084, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 2048, 1, 0, 2064, 1, 0, 2080, 1, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2624, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2640, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 2656, 1090785345, 0, 3392, 272696336, 0, 3392, 272696336, 0, 3392, 272696336, 0, 3392, 272696336, 0, 3392, 272696336, 0, 3712, 68174084, 0, 3712, 68174084, 0, 3712, 68174084, 0, 3712, 68174084, 0, 3712, 68174084, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0, 4288, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364551249235452_14_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364551249235452_14_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef3cdcb5 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364551249235452_14_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,301 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((161 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 2192, 1, 0, 2208, 1, 0, 2224, 1, 0, 3968, 1, 0, 5584, 32, 0, 5600, 32, 0, 6736, 1073741830, 0, 6736, 1073741830, 0, 6736, 1073741830, 0, 6752, 1073741830, 0, 6752, 1073741830, 0, 6752, 1073741830, 0, 7440, 1073741826, 0, 7440, 1073741826, 0, 7456, 1073741826, 0, 7456, 1073741826, 0, 10324, 67108864, 0, 10328, 67108864, 0, 10340, 67108864, 0, 10344, 67108864, 0, 11584, 2147483648, 0, 17600, 134217728, 0, 18304, 134217728, 0, 20608, 8390656, 0, 20608, 8390656, 0, 1280, 1, 0, 2192, 1, 0, 2208, 1, 0, 2224, 1, 0, 3968, 1, 0, 5584, 32, 0, 5600, 32, 0, 6736, 1073741830, 0, 6736, 1073741830, 0, 6736, 1073741830, 0, 6752, 1073741830, 0, 6752, 1073741830, 0, 6752, 1073741830, 0, 7440, 1073741826, 0, 7440, 1073741826, 0, 7456, 1073741826, 0, 7456, 1073741826, 0, 10324, 67108864, 0, 10328, 67108864, 0, 10340, 67108864, 0, 10344, 67108864, 0, 11584, 2147483648, 0, 17600, 134217728, 0, 18304, 134217728, 0, 20608, 8390656, 0, 20608, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364566952065479_16_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364566952065479_16_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e7fb7535 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364566952065479_16_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 13)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3520, 16908292, 0, 3520, 16908292, 0, 3520, 16908292, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 2752, 67641344, 0, 2752, 67641344, 0, 2752, 67641344, 0, 7168, 1073741829, 0, 7168, 1073741829, 0, 7168, 1073741829, 0, 7872, 1073741829, 0, 7872, 1073741829, 0, 7872, 1073741829, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 3520, 16908292, 0, 3520, 16908292, 0, 3520, 16908292, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 3136, 8187, 0, 2752, 67641344, 0, 2752, 67641344, 0, 2752, 67641344, 0, 7168, 1073741829, 0, 7168, 1073741829, 0, 7168, 1073741829, 0, 7872, 1073741829, 0, 7872, 1073741829, 0, 7872, 1073741829, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0, 8448, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364568142272300_17_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364568142272300_17_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a2418698 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364568142272300_17_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,210 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6288, 512, 0, 6304, 512, 0, 6320, 512, 0, 8400, 512, 0, 8416, 512, 0, 8432, 512, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 10688, 2147483648, 0, 11648, 2048, 0, 11664, 2048, 0, 11680, 2048, 0, 6288, 512, 0, 6304, 512, 0, 6320, 512, 0, 8400, 512, 0, 8416, 512, 0, 8432, 512, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 8704, 1717986918, 0, 10688, 2147483648, 0, 11648, 2048, 0, 11664, 2048, 0, 11680, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364580685371276_18_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364580685371276_18_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9db0147f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364580685371276_18_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,246 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 7616, 8, 0, 8512, 536870944, 0, 8512, 536870944, 0, 9472, 128, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 1088, 3758097407, 0, 7616, 8, 0, 8512, 536870944, 0, 8512, 536870944, 0, 9472, 128, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0, 10432, 3758097407, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364629643353107_20_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364629643353107_20_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17208fc4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364629643353107_20_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,410 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 11))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 16)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 21))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2384, 1, 0, 2400, 1, 0, 2416, 1, 0, 6656, 268435584, 0, 6656, 268435584, 0, 17984, 73, 0, 17984, 73, 0, 17984, 73, 0, 19200, 272696336, 0, 19200, 272696336, 0, 19200, 272696336, 0, 19200, 272696336, 0, 19200, 272696336, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 2384, 1, 0, 2400, 1, 0, 2416, 1, 0, 6656, 268435584, 0, 6656, 268435584, 0, 17984, 73, 0, 17984, 73, 0, 17984, 73, 0, 19200, 272696336, 0, 19200, 272696336, 0, 19200, 272696336, 0, 19200, 272696336, 0, 19200, 272696336, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0, 25344, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364737669086444_22_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364737669086444_22_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6b84c5e9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364737669086444_22_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,166 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 30)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((i0 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4864, 85, 0, 4864, 85, 0, 4864, 85, 0, 4864, 85, 0, 5776, 2147483648, 0, 5792, 2147483648, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0, 4864, 85, 0, 4864, 85, 0, 4864, 85, 0, 4864, 85, 0, 5776, 2147483648, 0, 5792, 2147483648, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6800, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 6816, 2863311530, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8528, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0, 8544, 2862612480, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364740085407034_23_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364740085407034_23_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6cf4ea28 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364740085407034_23_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,347 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 16)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 648 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 512, 0, 1872, 512, 0, 1888, 512, 0, 3072, 1073741825, 0, 3072, 1073741825, 0, 3076, 1073741825, 0, 3076, 1073741825, 0, 3080, 1073741825, 0, 3080, 1073741825, 0, 3088, 1073741825, 0, 3088, 1073741825, 0, 3092, 1073741825, 0, 3092, 1073741825, 0, 3096, 1073741825, 0, 3096, 1073741825, 0, 3104, 1073741825, 0, 3104, 1073741825, 0, 3108, 1073741825, 0, 3108, 1073741825, 0, 3112, 1073741825, 0, 3112, 1073741825, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 8128, 17, 0, 8128, 17, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 9920, 1140850692, 0, 9920, 1140850692, 0, 9920, 1140850692, 0, 9936, 1140850692, 0, 9936, 1140850692, 0, 9936, 1140850692, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 11008, 17, 0, 11008, 17, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 14464, 4, 0, 16720, 4473920, 0, 16720, 4473920, 0, 16720, 4473920, 0, 16720, 4473920, 0, 16720, 4473920, 0, 16736, 4473920, 0, 16736, 4473920, 0, 16736, 4473920, 0, 16736, 4473920, 0, 16736, 4473920, 0, 17472, 4194304, 0, 17920, 559240, 0, 17920, 559240, 0, 17920, 559240, 0, 17920, 559240, 0, 17920, 559240, 0, 1856, 512, 0, 1872, 512, 0, 1888, 512, 0, 3072, 1073741825, 0, 3072, 1073741825, 0, 3076, 1073741825, 0, 3076, 1073741825, 0, 3080, 1073741825, 0, 3080, 1073741825, 0, 3088, 1073741825, 0, 3088, 1073741825, 0, 3092, 1073741825, 0, 3092, 1073741825, 0, 3096, 1073741825, 0, 3096, 1073741825, 0, 3104, 1073741825, 0, 3104, 1073741825, 0, 3108, 1073741825, 0, 3108, 1073741825, 0, 3112, 1073741825, 0, 3112, 1073741825, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 8128, 17, 0, 8128, 17, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 8704, 286331153, 0, 9920, 1140850692, 0, 9920, 1140850692, 0, 9920, 1140850692, 0, 9936, 1140850692, 0, 9936, 1140850692, 0, 9936, 1140850692, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 10368, 838860, 0, 11008, 17, 0, 11008, 17, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12048, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12064, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12624, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 12640, 572662306, 0, 14464, 4, 0, 16720, 4473920, 0, 16720, 4473920, 0, 16720, 4473920, 0, 16720, 4473920, 0, 16720, 4473920, 0, 16736, 4473920, 0, 16736, 4473920, 0, 16736, 4473920, 0, 16736, 4473920, 0, 16736, 4473920, 0, 17472, 4194304, 0, 17920, 559240, 0, 17920, 559240, 0, 17920, 559240, 0, 17920, 559240, 0, 17920, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364860249877930_24_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364860249877930_24_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2313cb34 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364860249877930_24_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 1856, 1024, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 2496, 4290774015, 0, 1856, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364860460491484_25_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364860460491484_25_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e8c7b206 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364860460491484_25_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,277 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1207959553, 0, 1280, 1207959553, 0, 1280, 1207959553, 0, 2192, 1207959552, 0, 2192, 1207959552, 0, 2208, 1207959552, 0, 2208, 1207959552, 0, 2224, 1207959552, 0, 2224, 1207959552, 0, 3536, 1207959553, 0, 3536, 1207959553, 0, 3536, 1207959553, 0, 3540, 1207959553, 0, 3540, 1207959553, 0, 3540, 1207959553, 0, 3552, 1207959553, 0, 3552, 1207959553, 0, 3552, 1207959553, 0, 3556, 1207959553, 0, 3556, 1207959553, 0, 3556, 1207959553, 0, 3568, 1207959553, 0, 3568, 1207959553, 0, 3568, 1207959553, 0, 3572, 1207959553, 0, 3572, 1207959553, 0, 3572, 1207959553, 0, 4240, 1, 0, 4244, 1, 0, 4256, 1, 0, 4260, 1, 0, 4272, 1, 0, 4276, 1, 0, 4880, 1, 0, 4896, 1, 0, 4912, 1, 0, 5312, 262144, 0, 5760, 8, 0, 6400, 16, 0, 9728, 524416, 0, 9728, 524416, 0, 11328, 603979780, 0, 11328, 603979780, 0, 11328, 603979780, 0, 11344, 603979780, 0, 11344, 603979780, 0, 11344, 603979780, 0, 17280, 536871204, 0, 17280, 536871204, 0, 17280, 536871204, 0, 17280, 536871204, 0, 17296, 536871204, 0, 17296, 536871204, 0, 17296, 536871204, 0, 17296, 536871204, 0, 1280, 1207959553, 0, 1280, 1207959553, 0, 1280, 1207959553, 0, 2192, 1207959552, 0, 2192, 1207959552, 0, 2208, 1207959552, 0, 2208, 1207959552, 0, 2224, 1207959552, 0, 2224, 1207959552, 0, 3536, 1207959553, 0, 3536, 1207959553, 0, 3536, 1207959553, 0, 3540, 1207959553, 0, 3540, 1207959553, 0, 3540, 1207959553, 0, 3552, 1207959553, 0, 3552, 1207959553, 0, 3552, 1207959553, 0, 3556, 1207959553, 0, 3556, 1207959553, 0, 3556, 1207959553, 0, 3568, 1207959553, 0, 3568, 1207959553, 0, 3568, 1207959553, 0, 3572, 1207959553, 0, 3572, 1207959553, 0, 3572, 1207959553, 0, 4240, 1, 0, 4244, 1, 0, 4256, 1, 0, 4260, 1, 0, 4272, 1, 0, 4276, 1, 0, 4880, 1, 0, 4896, 1, 0, 4912, 1, 0, 5312, 262144, 0, 5760, 8, 0, 6400, 16, 0, 9728, 524416, 0, 9728, 524416, 0, 11328, 603979780, 0, 11328, 603979780, 0, 11328, 603979780, 0, 11344, 603979780, 0, 11344, 603979780, 0, 11344, 603979780, 0, 17280, 536871204, 0, 17280, 536871204, 0, 17280, 536871204, 0, 17280, 536871204, 0, 17296, 536871204, 0, 17296, 536871204, 0, 17296, 536871204, 0, 17296, 536871204, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364920407319181_29_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364920407319181_29_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fd8da4e2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364920407319181_29_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,262 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 12)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 4160, 136348168, 0, 4160, 136348168, 0, 4160, 136348168, 0, 4160, 136348168, 0, 4160, 136348168, 0, 5952, 272696336, 0, 5952, 272696336, 0, 5952, 272696336, 0, 5952, 272696336, 0, 5952, 272696336, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 10560, 536870944, 0, 10560, 536870944, 0, 10576, 536870944, 0, 10576, 536870944, 0, 11264, 32, 0, 11280, 32, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 2432, 1090785345, 0, 4160, 136348168, 0, 4160, 136348168, 0, 4160, 136348168, 0, 4160, 136348168, 0, 4160, 136348168, 0, 5952, 272696336, 0, 5952, 272696336, 0, 5952, 272696336, 0, 5952, 272696336, 0, 5952, 272696336, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6272, 613566756, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 6912, 85, 0, 10560, 536870944, 0, 10560, 536870944, 0, 10576, 536870944, 0, 10576, 536870944, 0, 11264, 32, 0, 11280, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756364923288090280_30_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756364923288090280_30_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ada63bf1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756364923288090280_30_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,474 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((168 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((187 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((207 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((330 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 20))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (408 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (418 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (427 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (459 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 582 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 7040, 536870912, 0, 7616, 536870912, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 10768, 2281701512, 0, 10768, 2281701512, 0, 10768, 2281701512, 0, 10768, 2281701512, 0, 10772, 2281701512, 0, 10772, 2281701512, 0, 10772, 2281701512, 0, 10772, 2281701512, 0, 10784, 2281701512, 0, 10784, 2281701512, 0, 10784, 2281701512, 0, 10784, 2281701512, 0, 10788, 2281701512, 0, 10788, 2281701512, 0, 10788, 2281701512, 0, 10788, 2281701512, 0, 10800, 2281701512, 0, 10800, 2281701512, 0, 10800, 2281701512, 0, 10800, 2281701512, 0, 10804, 2281701512, 0, 10804, 2281701512, 0, 10804, 2281701512, 0, 10804, 2281701512, 0, 11408, 8, 0, 11412, 8, 0, 11424, 8, 0, 11428, 8, 0, 11440, 8, 0, 11444, 8, 0, 12304, 8390656, 0, 12304, 8390656, 0, 12308, 8390656, 0, 12308, 8390656, 0, 12320, 8390656, 0, 12320, 8390656, 0, 12324, 8390656, 0, 12324, 8390656, 0, 12336, 8390656, 0, 12336, 8390656, 0, 12340, 8390656, 0, 12340, 8390656, 0, 13264, 2147483784, 0, 13264, 2147483784, 0, 13264, 2147483784, 0, 13268, 2147483784, 0, 13268, 2147483784, 0, 13268, 2147483784, 0, 13280, 2147483784, 0, 13280, 2147483784, 0, 13280, 2147483784, 0, 13284, 2147483784, 0, 13284, 2147483784, 0, 13284, 2147483784, 0, 13296, 2147483784, 0, 13296, 2147483784, 0, 13296, 2147483784, 0, 13300, 2147483784, 0, 13300, 2147483784, 0, 13300, 2147483784, 0, 15744, 8388608, 0, 19856, 8388608, 0, 19872, 8388608, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 26112, 16384, 0, 26752, 1, 0, 28800, 269484032, 0, 28800, 269484032, 0, 576, 17, 0, 576, 17, 0, 7040, 536870912, 0, 7616, 536870912, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 8640, 1145324612, 0, 10768, 2281701512, 0, 10768, 2281701512, 0, 10768, 2281701512, 0, 10768, 2281701512, 0, 10772, 2281701512, 0, 10772, 2281701512, 0, 10772, 2281701512, 0, 10772, 2281701512, 0, 10784, 2281701512, 0, 10784, 2281701512, 0, 10784, 2281701512, 0, 10784, 2281701512, 0, 10788, 2281701512, 0, 10788, 2281701512, 0, 10788, 2281701512, 0, 10788, 2281701512, 0, 10800, 2281701512, 0, 10800, 2281701512, 0, 10800, 2281701512, 0, 10800, 2281701512, 0, 10804, 2281701512, 0, 10804, 2281701512, 0, 10804, 2281701512, 0, 10804, 2281701512, 0, 11408, 8, 0, 11412, 8, 0, 11424, 8, 0, 11428, 8, 0, 11440, 8, 0, 11444, 8, 0, 12304, 8390656, 0, 12304, 8390656, 0, 12308, 8390656, 0, 12308, 8390656, 0, 12320, 8390656, 0, 12320, 8390656, 0, 12324, 8390656, 0, 12324, 8390656, 0, 12336, 8390656, 0, 12336, 8390656, 0, 12340, 8390656, 0, 12340, 8390656, 0, 13264, 2147483784, 0, 13264, 2147483784, 0, 13264, 2147483784, 0, 13268, 2147483784, 0, 13268, 2147483784, 0, 13268, 2147483784, 0, 13280, 2147483784, 0, 13280, 2147483784, 0, 13280, 2147483784, 0, 13284, 2147483784, 0, 13284, 2147483784, 0, 13284, 2147483784, 0, 13296, 2147483784, 0, 13296, 2147483784, 0, 13296, 2147483784, 0, 13300, 2147483784, 0, 13300, 2147483784, 0, 13300, 2147483784, 0, 15744, 8388608, 0, 19856, 8388608, 0, 19872, 8388608, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 23232, 4278191103, 0, 26112, 16384, 0, 26752, 1, 0, 28800, 269484032, 0, 28800, 269484032, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756365104419593117_32_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756365104419593117_32_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..21c666bd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756365104419593117_32_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((48 << 6) | (i0 << 4)) | (i1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((100 << 6) | (i0 << 4)) | (i3 << 2)) | counter4); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i0 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 600 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 6401, 537920640, 0, 6401, 537920640, 0, 6401, 537920640, 0, 6401, 537920640, 0, 6402, 537920640, 0, 6402, 537920640, 0, 6402, 537920640, 0, 6402, 537920640, 0, 6405, 537920640, 0, 6405, 537920640, 0, 6405, 537920640, 0, 6405, 537920640, 0, 6406, 537920640, 0, 6406, 537920640, 0, 6406, 537920640, 0, 6406, 537920640, 0, 6417, 537920640, 0, 6417, 537920640, 0, 6417, 537920640, 0, 6417, 537920640, 0, 6418, 537920640, 0, 6418, 537920640, 0, 6418, 537920640, 0, 6418, 537920640, 0, 6421, 537920640, 0, 6421, 537920640, 0, 6421, 537920640, 0, 6421, 537920640, 0, 6422, 537920640, 0, 6422, 537920640, 0, 6422, 537920640, 0, 6422, 537920640, 0, 6848, 134217728, 0, 6852, 134217728, 0, 6864, 134217728, 0, 6868, 134217728, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 6401, 537920640, 0, 6401, 537920640, 0, 6401, 537920640, 0, 6401, 537920640, 0, 6402, 537920640, 0, 6402, 537920640, 0, 6402, 537920640, 0, 6402, 537920640, 0, 6405, 537920640, 0, 6405, 537920640, 0, 6405, 537920640, 0, 6405, 537920640, 0, 6406, 537920640, 0, 6406, 537920640, 0, 6406, 537920640, 0, 6406, 537920640, 0, 6417, 537920640, 0, 6417, 537920640, 0, 6417, 537920640, 0, 6417, 537920640, 0, 6418, 537920640, 0, 6418, 537920640, 0, 6418, 537920640, 0, 6418, 537920640, 0, 6421, 537920640, 0, 6421, 537920640, 0, 6421, 537920640, 0, 6421, 537920640, 0, 6422, 537920640, 0, 6422, 537920640, 0, 6422, 537920640, 0, 6422, 537920640, 0, 6848, 134217728, 0, 6852, 134217728, 0, 6864, 134217728, 0, 6868, 134217728, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7424, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0, 7440, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756365162576943406_34_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756365162576943406_34_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6fbe2a6f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756365162576943406_34_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,578 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((273 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((355 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((403 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((410 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (421 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (432 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((448 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((457 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (472 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 600 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4928, 559240, 0, 4928, 559240, 0, 4928, 559240, 0, 4928, 559240, 0, 4928, 559240, 0, 6784, 272696336, 0, 6784, 272696336, 0, 6784, 272696336, 0, 6784, 272696336, 0, 6784, 272696336, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 11904, 268501008, 0, 11904, 268501008, 0, 11904, 268501008, 0, 16336, 570425344, 0, 16336, 570425344, 0, 17492, 536870914, 0, 17492, 536870914, 0, 17496, 536870914, 0, 17496, 536870914, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20544, 279620, 0, 20544, 279620, 0, 20544, 279620, 0, 20544, 279620, 0, 20544, 279620, 0, 24704, 2147483648, 0, 25792, 2290089984, 0, 25792, 2290089984, 0, 25792, 2290089984, 0, 25808, 2290089984, 0, 25808, 2290089984, 0, 25808, 2290089984, 0, 25824, 2290089984, 0, 25824, 2290089984, 0, 25824, 2290089984, 0, 26240, 2290089984, 0, 26240, 2290089984, 0, 26240, 2290089984, 0, 26256, 2290089984, 0, 26256, 2290089984, 0, 26256, 2290089984, 0, 26272, 2290089984, 0, 26272, 2290089984, 0, 26272, 2290089984, 0, 26944, 2281701376, 0, 26944, 2281701376, 0, 27648, 8, 0, 28688, 559240, 0, 28688, 559240, 0, 28688, 559240, 0, 28688, 559240, 0, 28688, 559240, 0, 28704, 559240, 0, 28704, 559240, 0, 28704, 559240, 0, 28704, 559240, 0, 28704, 559240, 0, 28720, 559240, 0, 28720, 559240, 0, 28720, 559240, 0, 28720, 559240, 0, 28720, 559240, 0, 29264, 559240, 0, 29264, 559240, 0, 29264, 559240, 0, 29264, 559240, 0, 29264, 559240, 0, 29280, 559240, 0, 29280, 559240, 0, 29280, 559240, 0, 29280, 559240, 0, 29280, 559240, 0, 29296, 559240, 0, 29296, 559240, 0, 29296, 559240, 0, 29296, 559240, 0, 29296, 559240, 0, 30208, 2048, 0, 576, 17, 0, 576, 17, 0, 4928, 559240, 0, 4928, 559240, 0, 4928, 559240, 0, 4928, 559240, 0, 4928, 559240, 0, 6784, 272696336, 0, 6784, 272696336, 0, 6784, 272696336, 0, 6784, 272696336, 0, 6784, 272696336, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 11904, 268501008, 0, 11904, 268501008, 0, 11904, 268501008, 0, 16336, 570425344, 0, 16336, 570425344, 0, 17492, 536870914, 0, 17492, 536870914, 0, 17496, 536870914, 0, 17496, 536870914, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 18560, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20544, 279620, 0, 20544, 279620, 0, 20544, 279620, 0, 20544, 279620, 0, 20544, 279620, 0, 24704, 2147483648, 0, 25792, 2290089984, 0, 25792, 2290089984, 0, 25792, 2290089984, 0, 25808, 2290089984, 0, 25808, 2290089984, 0, 25808, 2290089984, 0, 25824, 2290089984, 0, 25824, 2290089984, 0, 25824, 2290089984, 0, 26240, 2290089984, 0, 26240, 2290089984, 0, 26240, 2290089984, 0, 26256, 2290089984, 0, 26256, 2290089984, 0, 26256, 2290089984, 0, 26272, 2290089984, 0, 26272, 2290089984, 0, 26272, 2290089984, 0, 26944, 2281701376, 0, 26944, 2281701376, 0, 27648, 8, 0, 28688, 559240, 0, 28688, 559240, 0, 28688, 559240, 0, 28688, 559240, 0, 28688, 559240, 0, 28704, 559240, 0, 28704, 559240, 0, 28704, 559240, 0, 28704, 559240, 0, 28704, 559240, 0, 28720, 559240, 0, 28720, 559240, 0, 28720, 559240, 0, 28720, 559240, 0, 28720, 559240, 0, 29264, 559240, 0, 29264, 559240, 0, 29264, 559240, 0, 29264, 559240, 0, 29264, 559240, 0, 29280, 559240, 0, 29280, 559240, 0, 29280, 559240, 0, 29280, 559240, 0, 29280, 559240, 0, 29296, 559240, 0, 29296, 559240, 0, 29296, 559240, 0, 29296, 559240, 0, 29296, 559240, 0, 30208, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756365177639189142_35_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756365177639189142_35_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..84c2e5a8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756365177639189142_35_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,380 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0, 3200, 8, 0, 7104, 17, 0, 7104, 17, 0, 8320, 536870946, 0, 8320, 536870946, 0, 8320, 536870946, 0, 8336, 536870946, 0, 8336, 536870946, 0, 8336, 536870946, 0, 9472, 2, 0, 9488, 2, 0, 10304, 570425890, 0, 10304, 570425890, 0, 10304, 570425890, 0, 10304, 570425890, 0, 10304, 570425890, 0, 10320, 570425890, 0, 10320, 570425890, 0, 10320, 570425890, 0, 10320, 570425890, 0, 10320, 570425890, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 11776, 2147483648, 0, 16704, 8, 0, 19904, 8390656, 0, 19904, 8390656, 0, 20608, 8, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0, 3200, 8, 0, 7104, 17, 0, 7104, 17, 0, 8320, 536870946, 0, 8320, 536870946, 0, 8320, 536870946, 0, 8336, 536870946, 0, 8336, 536870946, 0, 8336, 536870946, 0, 9472, 2, 0, 9488, 2, 0, 10304, 570425890, 0, 10304, 570425890, 0, 10304, 570425890, 0, 10304, 570425890, 0, 10304, 570425890, 0, 10320, 570425890, 0, 10320, 570425890, 0, 10320, 570425890, 0, 10320, 570425890, 0, 10320, 570425890, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 10624, 1145324612, 0, 11776, 2147483648, 0, 16704, 8, 0, 19904, 8390656, 0, 19904, 8390656, 0, 20608, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756365526488050213_39_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756365526488050213_39_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..39c176f5 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756365526488050213_39_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,215 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2176, 537002016, 0, 2176, 537002016, 0, 2176, 537002016, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2944, 559240, 0, 2944, 559240, 0, 2944, 559240, 0, 2944, 559240, 0, 2944, 559240, 0, 3840, 73, 0, 3840, 73, 0, 3840, 73, 0, 5056, 272696336, 0, 5056, 272696336, 0, 5056, 272696336, 0, 5056, 272696336, 0, 5056, 272696336, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 576, 17, 0, 576, 17, 0, 2176, 537002016, 0, 2176, 537002016, 0, 2176, 537002016, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2496, 1145324612, 0, 2944, 559240, 0, 2944, 559240, 0, 2944, 559240, 0, 2944, 559240, 0, 2944, 559240, 0, 3840, 73, 0, 3840, 73, 0, 3840, 73, 0, 5056, 272696336, 0, 5056, 272696336, 0, 5056, 272696336, 0, 5056, 272696336, 0, 5056, 272696336, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0, 5696, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756365528190012995_40_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756365528190012995_40_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e0dc74ed --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756365528190012995_40_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,317 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 23))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((250 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((261 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 13760, 2, 0, 14848, 8388608, 0, 14864, 8388608, 0, 17280, 8388608, 0, 17296, 8388608, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 1600, 2863311530, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3136, 572662306, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 3584, 699050, 0, 13760, 2, 0, 14848, 8388608, 0, 14864, 8388608, 0, 17280, 8388608, 0, 17296, 8388608, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366109042143295_42_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366109042143295_42_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8045b81c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366109042143295_42_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 3328, 256, 0, 3344, 256, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 3328, 256, 0, 3344, 256, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366111071107251_43_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366111071107251_43_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44b732ab --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366111071107251_43_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,349 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((149 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((174 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((260 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((269 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 30)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1134 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1616, 8, 0, 1632, 8, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 4052, 2818572298, 0, 4052, 2818572298, 0, 4052, 2818572298, 0, 4052, 2818572298, 0, 4052, 2818572298, 0, 4056, 2818572298, 0, 4056, 2818572298, 0, 4056, 2818572298, 0, 4056, 2818572298, 0, 4056, 2818572298, 0, 4068, 2818572298, 0, 4068, 2818572298, 0, 4068, 2818572298, 0, 4068, 2818572298, 0, 4068, 2818572298, 0, 4072, 2818572298, 0, 4072, 2818572298, 0, 4072, 2818572298, 0, 4072, 2818572298, 0, 4072, 2818572298, 0, 6612, 2097152, 0, 6616, 2097152, 0, 6628, 2097152, 0, 6632, 2097152, 0, 8256, 136348168, 0, 8256, 136348168, 0, 8256, 136348168, 0, 8256, 136348168, 0, 8256, 136348168, 0, 8272, 136348168, 0, 8272, 136348168, 0, 8272, 136348168, 0, 8272, 136348168, 0, 8272, 136348168, 0, 9540, 73, 0, 9540, 73, 0, 9540, 73, 0, 9544, 73, 0, 9544, 73, 0, 9544, 73, 0, 9556, 73, 0, 9556, 73, 0, 9556, 73, 0, 9560, 73, 0, 9560, 73, 0, 9560, 73, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 11140, 1207959625, 0, 11140, 1207959625, 0, 11140, 1207959625, 0, 11140, 1207959625, 0, 11140, 1207959625, 0, 11144, 1207959625, 0, 11144, 1207959625, 0, 11144, 1207959625, 0, 11144, 1207959625, 0, 11144, 1207959625, 0, 11156, 1207959625, 0, 11156, 1207959625, 0, 11156, 1207959625, 0, 11156, 1207959625, 0, 11156, 1207959625, 0, 11160, 1207959625, 0, 11160, 1207959625, 0, 11160, 1207959625, 0, 11160, 1207959625, 0, 11160, 1207959625, 0, 11712, 272696336, 0, 11712, 272696336, 0, 11712, 272696336, 0, 11712, 272696336, 0, 11712, 272696336, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 16660, 671219712, 0, 16660, 671219712, 0, 16660, 671219712, 0, 16664, 671219712, 0, 16664, 671219712, 0, 16664, 671219712, 0, 16676, 671219712, 0, 16676, 671219712, 0, 16676, 671219712, 0, 16680, 671219712, 0, 16680, 671219712, 0, 16680, 671219712, 0, 17236, 671219712, 0, 17236, 671219712, 0, 17236, 671219712, 0, 17240, 671219712, 0, 17240, 671219712, 0, 17240, 671219712, 0, 17252, 671219712, 0, 17252, 671219712, 0, 17252, 671219712, 0, 17256, 671219712, 0, 17256, 671219712, 0, 17256, 671219712, 0, 18576, 64, 0, 18592, 64, 0, 20032, 134217728, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1616, 8, 0, 1632, 8, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3348, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3352, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3364, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 3368, 2818572330, 0, 4052, 2818572298, 0, 4052, 2818572298, 0, 4052, 2818572298, 0, 4052, 2818572298, 0, 4052, 2818572298, 0, 4056, 2818572298, 0, 4056, 2818572298, 0, 4056, 2818572298, 0, 4056, 2818572298, 0, 4056, 2818572298, 0, 4068, 2818572298, 0, 4068, 2818572298, 0, 4068, 2818572298, 0, 4068, 2818572298, 0, 4068, 2818572298, 0, 4072, 2818572298, 0, 4072, 2818572298, 0, 4072, 2818572298, 0, 4072, 2818572298, 0, 4072, 2818572298, 0, 6612, 2097152, 0, 6616, 2097152, 0, 6628, 2097152, 0, 6632, 2097152, 0, 8256, 136348168, 0, 8256, 136348168, 0, 8256, 136348168, 0, 8256, 136348168, 0, 8256, 136348168, 0, 8272, 136348168, 0, 8272, 136348168, 0, 8272, 136348168, 0, 8272, 136348168, 0, 8272, 136348168, 0, 9540, 73, 0, 9540, 73, 0, 9540, 73, 0, 9544, 73, 0, 9544, 73, 0, 9544, 73, 0, 9556, 73, 0, 9556, 73, 0, 9556, 73, 0, 9560, 73, 0, 9560, 73, 0, 9560, 73, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10436, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10440, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10452, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 10456, 1090785345, 0, 11140, 1207959625, 0, 11140, 1207959625, 0, 11140, 1207959625, 0, 11140, 1207959625, 0, 11140, 1207959625, 0, 11144, 1207959625, 0, 11144, 1207959625, 0, 11144, 1207959625, 0, 11144, 1207959625, 0, 11144, 1207959625, 0, 11156, 1207959625, 0, 11156, 1207959625, 0, 11156, 1207959625, 0, 11156, 1207959625, 0, 11156, 1207959625, 0, 11160, 1207959625, 0, 11160, 1207959625, 0, 11160, 1207959625, 0, 11160, 1207959625, 0, 11160, 1207959625, 0, 11712, 272696336, 0, 11712, 272696336, 0, 11712, 272696336, 0, 11712, 272696336, 0, 11712, 272696336, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 12032, 3067833782, 0, 16660, 671219712, 0, 16660, 671219712, 0, 16660, 671219712, 0, 16664, 671219712, 0, 16664, 671219712, 0, 16664, 671219712, 0, 16676, 671219712, 0, 16676, 671219712, 0, 16676, 671219712, 0, 16680, 671219712, 0, 16680, 671219712, 0, 16680, 671219712, 0, 17236, 671219712, 0, 17236, 671219712, 0, 17236, 671219712, 0, 17240, 671219712, 0, 17240, 671219712, 0, 17240, 671219712, 0, 17252, 671219712, 0, 17252, 671219712, 0, 17252, 671219712, 0, 17256, 671219712, 0, 17256, 671219712, 0, 17256, 671219712, 0, 18576, 64, 0, 18592, 64, 0, 20032, 134217728, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23488, 1145324548, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0, 23936, 838796, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366164839457268_44_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366164839457268_44_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2021ea09 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366164839457268_44_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 510 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2752, 18, 0, 2752, 18, 0, 2768, 18, 0, 2768, 18, 0, 4420, 272696336, 0, 4420, 272696336, 0, 4420, 272696336, 0, 4420, 272696336, 0, 4420, 272696336, 0, 4424, 272696336, 0, 4424, 272696336, 0, 4424, 272696336, 0, 4424, 272696336, 0, 4424, 272696336, 0, 4436, 272696336, 0, 4436, 272696336, 0, 4436, 272696336, 0, 4436, 272696336, 0, 4436, 272696336, 0, 4440, 272696336, 0, 4440, 272696336, 0, 4440, 272696336, 0, 4440, 272696336, 0, 4440, 272696336, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2752, 18, 0, 2752, 18, 0, 2768, 18, 0, 2768, 18, 0, 4420, 272696336, 0, 4420, 272696336, 0, 4420, 272696336, 0, 4420, 272696336, 0, 4420, 272696336, 0, 4424, 272696336, 0, 4424, 272696336, 0, 4424, 272696336, 0, 4424, 272696336, 0, 4424, 272696336, 0, 4436, 272696336, 0, 4436, 272696336, 0, 4436, 272696336, 0, 4436, 272696336, 0, 4436, 272696336, 0, 4440, 272696336, 0, 4440, 272696336, 0, 4440, 272696336, 0, 4440, 272696336, 0, 4440, 272696336, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4740, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4744, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4756, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 4760, 2454267026, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0, 5248, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366340497953672_46_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366340497953672_46_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fd697d9b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366340497953672_46_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,88 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((29 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 1856, 67108864, 0, 1860, 67108864, 0, 1864, 67108864, 0, 1872, 67108864, 0, 1876, 67108864, 0, 1880, 67108864, 0, 2304, 8, 0, 2308, 8, 0, 2312, 8, 0, 2320, 8, 0, 2324, 8, 0, 2328, 8, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 1856, 67108864, 0, 1860, 67108864, 0, 1864, 67108864, 0, 1872, 67108864, 0, 1876, 67108864, 0, 1880, 67108864, 0, 2304, 8, 0, 2308, 8, 0, 2312, 8, 0, 2320, 8, 0, 2324, 8, 0, 2328, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366345989049687_47_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366345989049687_47_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..08236a73 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366345989049687_47_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,342 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 6))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 17))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 27)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 11456, 136, 0, 11456, 136, 0, 20096, 134217728, 0, 576, 17, 0, 576, 17, 0, 11456, 136, 0, 11456, 136, 0, 20096, 134217728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366354242603520_48_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366354242603520_48_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d0ab640 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366354242603520_48_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4608, 68, 0, 4608, 68, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 6272, 73, 0, 6272, 73, 0, 6272, 73, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 576, 17, 0, 576, 17, 0, 4608, 68, 0, 4608, 68, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 6272, 73, 0, 6272, 73, 0, 6272, 73, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 6848, 1363481681, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0, 7168, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366355252492224_49_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366355252492224_49_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f74e9e7e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366355252492224_49_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 1, 0, 2640, 1, 0, 2656, 1, 0, 3200, 272696336, 0, 3200, 272696336, 0, 3200, 272696336, 0, 3200, 272696336, 0, 3200, 272696336, 0, 2624, 1, 0, 2640, 1, 0, 2656, 1, 0, 3200, 272696336, 0, 3200, 272696336, 0, 3200, 272696336, 0, 3200, 272696336, 0, 3200, 272696336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366359501327348_51_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366359501327348_51_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a6499cb0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366359501327348_51_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 3648, 134217728, 0, 3664, 134217728, 0, 4096, 1073741824, 0, 4112, 1073741824, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 7888, 585, 0, 7888, 585, 0, 7888, 585, 0, 7888, 585, 0, 8640, 272696336, 0, 8640, 272696336, 0, 8640, 272696336, 0, 8640, 272696336, 0, 8640, 272696336, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2752, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 2768, 1090785345, 0, 3648, 134217728, 0, 3664, 134217728, 0, 4096, 1073741824, 0, 4112, 1073741824, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 6784, 613566756, 0, 7888, 585, 0, 7888, 585, 0, 7888, 585, 0, 7888, 585, 0, 8640, 272696336, 0, 8640, 272696336, 0, 8640, 272696336, 0, 8640, 272696336, 0, 8640, 272696336, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366365821115344_53_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366365821115344_53_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1dd0b41a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366365821115344_53_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,387 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 28))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((326 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((392 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 67108864, 0, 2576, 67108864, 0, 2592, 67108864, 0, 3648, 67108880, 0, 3648, 67108880, 0, 3652, 67108880, 0, 3652, 67108880, 0, 3664, 67108880, 0, 3664, 67108880, 0, 3668, 67108880, 0, 3668, 67108880, 0, 3680, 67108880, 0, 3680, 67108880, 0, 3684, 67108880, 0, 3684, 67108880, 0, 5504, 2097152, 0, 6080, 272696320, 0, 6080, 272696320, 0, 6080, 272696320, 0, 6080, 272696320, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 17472, 32, 0, 23168, 85, 0, 23168, 85, 0, 23168, 85, 0, 23168, 85, 0, 25104, 32768, 0, 2560, 67108864, 0, 2576, 67108864, 0, 2592, 67108864, 0, 3648, 67108880, 0, 3648, 67108880, 0, 3652, 67108880, 0, 3652, 67108880, 0, 3664, 67108880, 0, 3664, 67108880, 0, 3668, 67108880, 0, 3668, 67108880, 0, 3680, 67108880, 0, 3680, 67108880, 0, 3684, 67108880, 0, 3684, 67108880, 0, 5504, 2097152, 0, 6080, 272696320, 0, 6080, 272696320, 0, 6080, 272696320, 0, 6080, 272696320, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 6400, 546457892, 0, 17472, 32, 0, 23168, 85, 0, 23168, 85, 0, 23168, 85, 0, 23168, 85, 0, 25104, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366371139605191_54_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366371139605191_54_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..74a19732 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366371139605191_54_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,324 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 18))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 18))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 31))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((317 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 25))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((400 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((419 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((430 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((457 << 6) | (counter3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((472 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 20))) { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((508 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((536 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i6 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (543 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 134217728, 0, 2320, 134217728, 0, 2336, 134217728, 0, 6528, 134217728, 0, 6544, 134217728, 0, 6560, 134217728, 0, 9856, 268435456, 0, 11840, 128, 0, 11856, 128, 0, 11872, 128, 0, 14864, 16809984, 0, 14864, 16809984, 0, 14880, 16809984, 0, 14880, 16809984, 0, 21008, 134217728, 0, 21024, 134217728, 0, 22928, 33554434, 0, 22928, 33554434, 0, 22944, 33554434, 0, 22944, 33554434, 0, 26832, 33554432, 0, 26836, 33554432, 0, 26848, 33554432, 0, 26852, 33554432, 0, 27536, 33554432, 0, 27540, 33554432, 0, 27552, 33554432, 0, 27556, 33554432, 0, 29264, 16, 0, 29268, 16, 0, 29272, 16, 0, 29280, 16, 0, 29284, 16, 0, 29288, 16, 0, 30224, 8192, 0, 30240, 8192, 0, 32512, 8192, 0, 32516, 8192, 0, 32528, 8192, 0, 32532, 8192, 0, 32544, 8192, 0, 32548, 8192, 0, 34304, 8192, 0, 34320, 8192, 0, 34336, 8192, 0, 2304, 134217728, 0, 2320, 134217728, 0, 2336, 134217728, 0, 6528, 134217728, 0, 6544, 134217728, 0, 6560, 134217728, 0, 9856, 268435456, 0, 11840, 128, 0, 11856, 128, 0, 11872, 128, 0, 14864, 16809984, 0, 14864, 16809984, 0, 14880, 16809984, 0, 14880, 16809984, 0, 21008, 134217728, 0, 21024, 134217728, 0, 22928, 33554434, 0, 22928, 33554434, 0, 22944, 33554434, 0, 22944, 33554434, 0, 26832, 33554432, 0, 26836, 33554432, 0, 26848, 33554432, 0, 26852, 33554432, 0, 27536, 33554432, 0, 27540, 33554432, 0, 27552, 33554432, 0, 27556, 33554432, 0, 29264, 16, 0, 29268, 16, 0, 29272, 16, 0, 29280, 16, 0, 29284, 16, 0, 29288, 16, 0, 30224, 8192, 0, 30240, 8192, 0, 32512, 8192, 0, 32516, 8192, 0, 32528, 8192, 0, 32532, 8192, 0, 32544, 8192, 0, 32548, 8192, 0, 34304, 8192, 0, 34320, 8192, 0, 34336, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756366463283673959_55_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756366463283673959_55_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4f2e2e1d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756366463283673959_55_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0, 4928, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756367044201548140_61_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756367044201548140_61_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2ce6b2cd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756367044201548140_61_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,352 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 16))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 3328, 8390656, 0, 3328, 8390656, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6016, 262404, 0, 6016, 262404, 0, 6016, 262404, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 7104, 85, 0, 7104, 85, 0, 7104, 85, 0, 7104, 85, 0, 9280, 8, 0, 15872, 2080, 0, 15872, 2080, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 3328, 8390656, 0, 3328, 8390656, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6272, 2863311530, 0, 6016, 262404, 0, 6016, 262404, 0, 6016, 262404, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 5632, 1431393361, 0, 7104, 85, 0, 7104, 85, 0, 7104, 85, 0, 7104, 85, 0, 9280, 8, 0, 15872, 2080, 0, 15872, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756367049328128399_62_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756367049328128399_62_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e563dcba --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756367049328128399_62_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,393 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((322 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 3392, 268443650, 0, 3392, 268443650, 0, 3392, 268443650, 0, 3408, 268443650, 0, 3408, 268443650, 0, 3408, 268443650, 0, 11712, 1048580, 0, 11712, 1048580, 0, 12800, 73, 0, 12800, 73, 0, 12800, 73, 0, 19904, 2453667842, 0, 19904, 2453667842, 0, 19904, 2453667842, 0, 19904, 2453667842, 0, 19904, 2453667842, 0, 19920, 2453667842, 0, 19920, 2453667842, 0, 19920, 2453667842, 0, 19920, 2453667842, 0, 19920, 2453667842, 0, 20608, 2449473538, 0, 20608, 2449473538, 0, 20608, 2449473538, 0, 20608, 2449473538, 0, 20624, 2449473538, 0, 20624, 2449473538, 0, 20624, 2449473538, 0, 20624, 2449473538, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 3392, 268443650, 0, 3392, 268443650, 0, 3392, 268443650, 0, 3408, 268443650, 0, 3408, 268443650, 0, 3408, 268443650, 0, 11712, 1048580, 0, 11712, 1048580, 0, 12800, 73, 0, 12800, 73, 0, 12800, 73, 0, 19904, 2453667842, 0, 19904, 2453667842, 0, 19904, 2453667842, 0, 19904, 2453667842, 0, 19904, 2453667842, 0, 19920, 2453667842, 0, 19920, 2453667842, 0, 19920, 2453667842, 0, 19920, 2453667842, 0, 19920, 2453667842, 0, 20608, 2449473538, 0, 20608, 2449473538, 0, 20608, 2449473538, 0, 20608, 2449473538, 0, 20624, 2449473538, 0, 20624, 2449473538, 0, 20624, 2449473538, 0, 20624, 2449473538, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0, 21120, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756367337056906129_65_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756367337056906129_65_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a4d0295 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756367337056906129_65_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5248, 1073741824, 0, 5264, 1073741824, 0, 5280, 1073741824, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 5248, 1073741824, 0, 5264, 1073741824, 0, 5280, 1073741824, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756367337936336881_66_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756367337936336881_66_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6ff58e04 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756367337936336881_66_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,200 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1664, 65, 0, 1664, 65, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2560, 68174084, 0, 2560, 68174084, 0, 2560, 68174084, 0, 2560, 68174084, 0, 2560, 68174084, 0, 5440, 73, 0, 5440, 73, 0, 5440, 73, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1664, 65, 0, 1664, 65, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2560, 68174084, 0, 2560, 68174084, 0, 2560, 68174084, 0, 2560, 68174084, 0, 2560, 68174084, 0, 5440, 73, 0, 5440, 73, 0, 5440, 73, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6016, 272696336, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756367339810927729_67_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756367339810927729_67_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f4284935 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756367339810927729_67_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,160 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 4672, 64, 0, 4688, 64, 0, 7360, 64, 0, 7376, 64, 0, 7808, 4194304, 0, 7824, 4194304, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1296, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 1312, 1342177621, 0, 4672, 64, 0, 4688, 64, 0, 7360, 64, 0, 7376, 64, 0, 7808, 4194304, 0, 7824, 4194304, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368023695344658_71_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368023695344658_71_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..89934fc3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368023695344658_71_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,229 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 438 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4736, 17, 0, 4736, 17, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 9536, 1024, 0, 12304, 682, 0, 12304, 682, 0, 12304, 682, 0, 12304, 682, 0, 12304, 682, 0, 12320, 682, 0, 12320, 682, 0, 12320, 682, 0, 12320, 682, 0, 12320, 682, 0, 12336, 682, 0, 12336, 682, 0, 12336, 682, 0, 12336, 682, 0, 12336, 682, 0, 13008, 682, 0, 13008, 682, 0, 13008, 682, 0, 13008, 682, 0, 13008, 682, 0, 13024, 682, 0, 13024, 682, 0, 13024, 682, 0, 13024, 682, 0, 13024, 682, 0, 13040, 682, 0, 13040, 682, 0, 13040, 682, 0, 13040, 682, 0, 13040, 682, 0, 4736, 17, 0, 4736, 17, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 5632, 1717986918, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6080, 978670, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 6720, 511, 0, 9536, 1024, 0, 12304, 682, 0, 12304, 682, 0, 12304, 682, 0, 12304, 682, 0, 12304, 682, 0, 12320, 682, 0, 12320, 682, 0, 12320, 682, 0, 12320, 682, 0, 12320, 682, 0, 12336, 682, 0, 12336, 682, 0, 12336, 682, 0, 12336, 682, 0, 12336, 682, 0, 13008, 682, 0, 13008, 682, 0, 13008, 682, 0, 13008, 682, 0, 13008, 682, 0, 13024, 682, 0, 13024, 682, 0, 13024, 682, 0, 13024, 682, 0, 13024, 682, 0, 13040, 682, 0, 13040, 682, 0, 13040, 682, 0, 13040, 682, 0, 13040, 682, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368025164646460_72_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368025164646460_72_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4449d0b0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368025164646460_72_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,348 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + } else { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((371 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 900 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 8192, 536870912, 0, 9344, 536870912, 0, 10512, 536870912, 0, 10528, 536870912, 0, 11200, 536870912, 0, 14336, 559240, 0, 14336, 559240, 0, 14336, 559240, 0, 14336, 559240, 0, 14336, 559240, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 19264, 85, 0, 19264, 85, 0, 19264, 85, 0, 19264, 85, 0, 22528, 8391170, 0, 22528, 8391170, 0, 22528, 8391170, 0, 22528, 8391170, 0, 22544, 8391170, 0, 22544, 8391170, 0, 22544, 8391170, 0, 22544, 8391170, 0, 22560, 8391170, 0, 22560, 8391170, 0, 22560, 8391170, 0, 22560, 8391170, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3280, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3284, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3296, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 3300, 286331153, 0, 8192, 536870912, 0, 9344, 536870912, 0, 10512, 536870912, 0, 10528, 536870912, 0, 11200, 536870912, 0, 14336, 559240, 0, 14336, 559240, 0, 14336, 559240, 0, 14336, 559240, 0, 14336, 559240, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 15824, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17296, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 17872, 1431655765, 0, 19264, 85, 0, 19264, 85, 0, 19264, 85, 0, 19264, 85, 0, 22528, 8391170, 0, 22528, 8391170, 0, 22528, 8391170, 0, 22528, 8391170, 0, 22544, 8391170, 0, 22544, 8391170, 0, 22544, 8391170, 0, 22544, 8391170, 0, 22560, 8391170, 0, 22560, 8391170, 0, 22560, 8391170, 0, 22560, 8391170, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23760, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23776, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 23792, 2854920360, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0, 24576, 2854224040, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368196710639078_74_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368196710639078_74_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f2007ea2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368196710639078_74_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,311 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 14))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 390 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1216, 1, 0, 2304, 268501008, 0, 2304, 268501008, 0, 2304, 268501008, 0, 2320, 268501008, 0, 2320, 268501008, 0, 2320, 268501008, 0, 2336, 268501008, 0, 2336, 268501008, 0, 2336, 268501008, 0, 6784, 18, 0, 6784, 18, 0, 6800, 18, 0, 6800, 18, 0, 6816, 18, 0, 6816, 18, 0, 7232, 301989888, 0, 7232, 301989888, 0, 7248, 301989888, 0, 7248, 301989888, 0, 7264, 301989888, 0, 7264, 301989888, 0, 7808, 33562626, 0, 7808, 33562626, 0, 7808, 33562626, 0, 7824, 33562626, 0, 7824, 33562626, 0, 7824, 33562626, 0, 7840, 33562626, 0, 7840, 33562626, 0, 7840, 33562626, 0, 10624, 1048576, 0, 10640, 1048576, 0, 10656, 1048576, 0, 13504, 131072, 0, 15040, 537002272, 0, 15040, 537002272, 0, 15040, 537002272, 0, 15040, 537002272, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 576, 17, 0, 576, 17, 0, 1216, 1, 0, 2304, 268501008, 0, 2304, 268501008, 0, 2304, 268501008, 0, 2320, 268501008, 0, 2320, 268501008, 0, 2320, 268501008, 0, 2336, 268501008, 0, 2336, 268501008, 0, 2336, 268501008, 0, 6784, 18, 0, 6784, 18, 0, 6800, 18, 0, 6800, 18, 0, 6816, 18, 0, 6816, 18, 0, 7232, 301989888, 0, 7232, 301989888, 0, 7248, 301989888, 0, 7248, 301989888, 0, 7264, 301989888, 0, 7264, 301989888, 0, 7808, 33562626, 0, 7808, 33562626, 0, 7808, 33562626, 0, 7824, 33562626, 0, 7824, 33562626, 0, 7824, 33562626, 0, 7840, 33562626, 0, 7840, 33562626, 0, 7840, 33562626, 0, 10624, 1048576, 0, 10640, 1048576, 0, 10656, 1048576, 0, 13504, 131072, 0, 15040, 537002272, 0, 15040, 537002272, 0, 15040, 537002272, 0, 15040, 537002272, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368202473678331_75_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368202473678331_75_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf161daf --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368202473678331_75_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,92 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 262144, 0, 912, 262144, 0, 896, 262144, 0, 912, 262144, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368203250115421_76_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368203250115421_76_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa35b665 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368203250115421_76_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,307 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 18))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 17, 0, 1472, 17, 0, 5504, 285212672, 0, 5504, 285212672, 0, 10752, 1118481, 0, 10752, 1118481, 0, 10752, 1118481, 0, 10752, 1118481, 0, 10752, 1118481, 0, 10752, 1118481, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 12096, 559240, 0, 12096, 559240, 0, 12096, 559240, 0, 12096, 559240, 0, 12096, 559240, 0, 12928, 65, 0, 12928, 65, 0, 15296, 272629760, 0, 15296, 272629760, 0, 17728, 4, 0, 1472, 17, 0, 1472, 17, 0, 5504, 285212672, 0, 5504, 285212672, 0, 10752, 1118481, 0, 10752, 1118481, 0, 10752, 1118481, 0, 10752, 1118481, 0, 10752, 1118481, 0, 10752, 1118481, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 11648, 1145324612, 0, 12096, 559240, 0, 12096, 559240, 0, 12096, 559240, 0, 12096, 559240, 0, 12096, 559240, 0, 12928, 65, 0, 12928, 65, 0, 15296, 272629760, 0, 15296, 272629760, 0, 17728, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368208007789331_77_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368208007789331_77_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8cc8255f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368208007789331_77_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 13)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 18)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 5760, 2056, 0, 5760, 2056, 0, 6720, 2048, 0, 7632, 131072, 0, 7648, 131072, 0, 8080, 16384, 0, 8096, 16384, 0, 768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 768, 5461, 0, 5760, 2056, 0, 5760, 2056, 0, 6720, 2048, 0, 7632, 131072, 0, 7648, 131072, 0, 8080, 16384, 0, 8096, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368210623940552_78_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368210623940552_78_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d4ba9131 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368210623940552_78_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,257 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 28)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 5))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2432, 1073741824, 0, 2448, 1073741824, 0, 2464, 1073741824, 0, 4224, 1, 0, 5120, 67108864, 0, 7040, 2101264, 0, 7040, 2101264, 0, 7040, 2101264, 0, 7040, 67108866, 0, 7040, 67108866, 0, 7680, 17, 0, 7680, 17, 0, 2432, 1073741824, 0, 2448, 1073741824, 0, 2464, 1073741824, 0, 4224, 1, 0, 5120, 67108864, 0, 7040, 2101264, 0, 7040, 2101264, 0, 7040, 2101264, 0, 7040, 67108866, 0, 7040, 67108866, 0, 7680, 17, 0, 7680, 17, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368213346927208_79_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368213346927208_79_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f6740a89 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368213346927208_79_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 906 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1536, 32768, 0, 1552, 32768, 0, 1568, 32768, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1536, 32768, 0, 1552, 32768, 0, 1568, 32768, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3716, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3720, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3724, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3732, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3736, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3740, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3748, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3752, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 3756, 2818572330, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4292, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4296, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4300, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4308, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4312, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4316, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4324, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4328, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0, 4332, 44739200, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368224980659849_80_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368224980659849_80_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1bf20486 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368224980659849_80_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368225138344756_81_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368225138344756_81_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d7455893 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368225138344756_81_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,88 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 8388608, 0, 576, 8388608, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368225257949708_82_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368225257949708_82_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8e472221 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368225257949708_82_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,591 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 17))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((262 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((368 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (383 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((413 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((424 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (434 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (439 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (449 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (458 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (470 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (477 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (481 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (491 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((511 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((521 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i6 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (533 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((563 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((570 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (575 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (579 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 20416, 134217728, 0, 20432, 134217728, 0, 22096, 524288, 0, 22112, 524288, 0, 23568, 128, 0, 23584, 128, 0, 23872, 8390656, 0, 23872, 8390656, 0, 24512, 17, 0, 24512, 17, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28736, 8, 0, 31424, 73, 0, 31424, 73, 0, 31424, 73, 0, 34112, 272696336, 0, 34112, 272696336, 0, 34112, 272696336, 0, 34112, 272696336, 0, 34112, 272696336, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 1168, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 5840, 286331153, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 17536, 1145324612, 0, 20416, 134217728, 0, 20432, 134217728, 0, 22096, 524288, 0, 22112, 524288, 0, 23568, 128, 0, 23584, 128, 0, 23872, 8390656, 0, 23872, 8390656, 0, 24512, 17, 0, 24512, 17, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28096, 1145324612, 0, 28736, 8, 0, 31424, 73, 0, 31424, 73, 0, 31424, 73, 0, 34112, 272696336, 0, 34112, 272696336, 0, 34112, 272696336, 0, 34112, 272696336, 0, 34112, 272696336, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0, 36800, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368294952855479_83_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368294952855479_83_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5875b27 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368294952855479_83_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,293 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4544, 1, 0, 9684, 16777217, 0, 9684, 16777217, 0, 9688, 16777217, 0, 9688, 16777217, 0, 9700, 16777217, 0, 9700, 16777217, 0, 9704, 16777217, 0, 9704, 16777217, 0, 10516, 16777217, 0, 10516, 16777217, 0, 10520, 16777217, 0, 10520, 16777217, 0, 10532, 16777217, 0, 10532, 16777217, 0, 10536, 16777217, 0, 10536, 16777217, 0, 11088, 1, 0, 11104, 1, 0, 13184, 1074004032, 0, 13184, 1074004032, 0, 13184, 1074004032, 0, 15568, 134217736, 0, 15568, 134217736, 0, 15584, 134217736, 0, 15584, 134217736, 0, 16272, 134217728, 0, 16288, 134217728, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 4544, 1, 0, 9684, 16777217, 0, 9684, 16777217, 0, 9688, 16777217, 0, 9688, 16777217, 0, 9700, 16777217, 0, 9700, 16777217, 0, 9704, 16777217, 0, 9704, 16777217, 0, 10516, 16777217, 0, 10516, 16777217, 0, 10520, 16777217, 0, 10520, 16777217, 0, 10532, 16777217, 0, 10532, 16777217, 0, 10536, 16777217, 0, 10536, 16777217, 0, 11088, 1, 0, 11104, 1, 0, 13184, 1074004032, 0, 13184, 1074004032, 0, 13184, 1074004032, 0, 15568, 134217736, 0, 15568, 134217736, 0, 15584, 134217736, 0, 15584, 134217736, 0, 16272, 134217728, 0, 16288, 134217728, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17408, 1363481681, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0, 17728, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368303182426282_84_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368303182426282_84_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4502c941 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368303182426282_84_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,136 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2256, 8388616, 0, 2256, 8388616, 0, 2272, 8388616, 0, 2272, 8388616, 0, 3920, 131072, 0, 3936, 131072, 0, 5328, 8388608, 0, 5344, 8388608, 0, 6288, 8913408, 0, 6288, 8913408, 0, 6288, 8913408, 0, 6304, 8913408, 0, 6304, 8913408, 0, 6304, 8913408, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2256, 8388616, 0, 2256, 8388616, 0, 2272, 8388616, 0, 2272, 8388616, 0, 3920, 131072, 0, 3936, 131072, 0, 5328, 8388608, 0, 5344, 8388608, 0, 6288, 8913408, 0, 6288, 8913408, 0, 6288, 8913408, 0, 6304, 8913408, 0, 6304, 8913408, 0, 6304, 8913408, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756368304198153466_85_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756368304198153466_85_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..72c35718 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756368304198153466_85_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,155 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 6544, 2130048, 0, 6544, 2130048, 0, 6544, 2130048, 0, 6560, 2130048, 0, 6560, 2130048, 0, 6560, 2130048, 0, 7360, 85, 0, 7360, 85, 0, 7360, 85, 0, 7360, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 6544, 2130048, 0, 6544, 2130048, 0, 6544, 2130048, 0, 6560, 2130048, 0, 6560, 2130048, 0, 6560, 2130048, 0, 7360, 85, 0, 7360, 85, 0, 7360, 85, 0, 7360, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369236583296590_88_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369236583296590_88_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3c89dbbd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369236583296590_88_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,576 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((308 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + } + break; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((364 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((383 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((394 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (424 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (442 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((461 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (475 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((494 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((508 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((517 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((526 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i9 = 0; (i9 < 2); i9 = (i9 + 1)) { + uint counter10 = 0; + while ((counter10 < 3)) { + counter10 = (counter10 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((552 << 6) | (i9 << 4)) | (counter10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((561 << 6) | (i9 << 4)) | (counter10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((572 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i9 == 1)) { + break; + } + } + break; + } + case 2: { + uint counter11 = 0; + while ((counter11 < 3)) { + counter11 = (counter11 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((593 << 6) | (counter11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((611 << 6) | (counter11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((622 << 6) | (counter11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (629 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 810 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 1, 0, 4928, 16777217, 0, 4928, 16777217, 0, 5504, 268501008, 0, 5504, 268501008, 0, 5504, 268501008, 0, 10944, 1073741824, 0, 10960, 1073741824, 0, 15376, 1024, 0, 15392, 1024, 0, 19732, 67108868, 0, 19732, 67108868, 0, 19736, 67108868, 0, 19736, 67108868, 0, 19748, 67108868, 0, 19748, 67108868, 0, 19752, 67108868, 0, 19752, 67108868, 0, 19764, 67108868, 0, 19764, 67108868, 0, 19768, 67108868, 0, 19768, 67108868, 0, 24512, 134217728, 0, 24516, 134217728, 0, 24528, 134217728, 0, 24532, 134217728, 0, 25216, 134217728, 0, 25220, 134217728, 0, 25232, 134217728, 0, 25236, 134217728, 0, 27136, 2147483648, 0, 28288, 2147483656, 0, 28288, 2147483656, 0, 29504, 2147483656, 0, 29504, 2147483656, 0, 29520, 2147483656, 0, 29520, 2147483656, 0, 29536, 2147483656, 0, 29536, 2147483656, 0, 30400, 2147483648, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 36608, 2, 0, 36624, 2, 0, 37968, 1140850688, 0, 37968, 1140850688, 0, 37984, 1140850688, 0, 37984, 1140850688, 0, 38000, 1140850688, 0, 38000, 1140850688, 0, 39120, 1140850692, 0, 39120, 1140850692, 0, 39120, 1140850692, 0, 39136, 1140850692, 0, 39136, 1140850692, 0, 39136, 1140850692, 0, 39152, 1140850692, 0, 39152, 1140850692, 0, 39152, 1140850692, 0, 39824, 1140850692, 0, 39824, 1140850692, 0, 39824, 1140850692, 0, 39840, 1140850692, 0, 39840, 1140850692, 0, 39840, 1140850692, 0, 39856, 1140850692, 0, 39856, 1140850692, 0, 39856, 1140850692, 0, 40256, 559240, 0, 40256, 559240, 0, 40256, 559240, 0, 40256, 559240, 0, 40256, 559240, 0, 4096, 1, 0, 4928, 16777217, 0, 4928, 16777217, 0, 5504, 268501008, 0, 5504, 268501008, 0, 5504, 268501008, 0, 10944, 1073741824, 0, 10960, 1073741824, 0, 15376, 1024, 0, 15392, 1024, 0, 19732, 67108868, 0, 19732, 67108868, 0, 19736, 67108868, 0, 19736, 67108868, 0, 19748, 67108868, 0, 19748, 67108868, 0, 19752, 67108868, 0, 19752, 67108868, 0, 19764, 67108868, 0, 19764, 67108868, 0, 19768, 67108868, 0, 19768, 67108868, 0, 24512, 134217728, 0, 24516, 134217728, 0, 24528, 134217728, 0, 24532, 134217728, 0, 25216, 134217728, 0, 25220, 134217728, 0, 25232, 134217728, 0, 25236, 134217728, 0, 27136, 2147483648, 0, 28288, 2147483656, 0, 28288, 2147483656, 0, 29504, 2147483656, 0, 29504, 2147483656, 0, 29520, 2147483656, 0, 29520, 2147483656, 0, 29536, 2147483656, 0, 29536, 2147483656, 0, 30400, 2147483648, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31632, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 31648, 286331153, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35908, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35912, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35916, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35924, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35928, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 35932, 572662306, 0, 36608, 2, 0, 36624, 2, 0, 37968, 1140850688, 0, 37968, 1140850688, 0, 37984, 1140850688, 0, 37984, 1140850688, 0, 38000, 1140850688, 0, 38000, 1140850688, 0, 39120, 1140850692, 0, 39120, 1140850692, 0, 39120, 1140850692, 0, 39136, 1140850692, 0, 39136, 1140850692, 0, 39136, 1140850692, 0, 39152, 1140850692, 0, 39152, 1140850692, 0, 39152, 1140850692, 0, 39824, 1140850692, 0, 39824, 1140850692, 0, 39824, 1140850692, 0, 39840, 1140850692, 0, 39840, 1140850692, 0, 39840, 1140850692, 0, 39856, 1140850692, 0, 39856, 1140850692, 0, 39856, 1140850692, 0, 40256, 559240, 0, 40256, 559240, 0, 40256, 559240, 0, 40256, 559240, 0, 40256, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369394212248425_89_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369394212248425_89_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0c574e58 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369394212248425_89_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,292 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 450 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 5440, 256, 0, 6016, 68173828, 0, 6016, 68173828, 0, 6016, 68173828, 0, 6016, 68173828, 0, 6592, 545392672, 0, 6592, 545392672, 0, 6592, 545392672, 0, 6592, 545392672, 0, 6592, 545392672, 0, 8128, 8, 0, 9024, 545392672, 0, 9024, 545392672, 0, 9024, 545392672, 0, 9024, 545392672, 0, 9024, 545392672, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 11728, 268502032, 0, 11728, 268502032, 0, 11728, 268502032, 0, 11728, 268502032, 0, 11744, 268502032, 0, 11744, 268502032, 0, 11744, 268502032, 0, 11744, 268502032, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 15888, 268435456, 0, 15904, 268435456, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 5440, 256, 0, 6016, 68173828, 0, 6016, 68173828, 0, 6016, 68173828, 0, 6016, 68173828, 0, 6592, 545392672, 0, 6592, 545392672, 0, 6592, 545392672, 0, 6592, 545392672, 0, 6592, 545392672, 0, 8128, 8, 0, 9024, 545392672, 0, 9024, 545392672, 0, 9024, 545392672, 0, 9024, 545392672, 0, 9024, 545392672, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 9600, 2863311530, 0, 11728, 268502032, 0, 11728, 268502032, 0, 11728, 268502032, 0, 11728, 268502032, 0, 11744, 268502032, 0, 11744, 268502032, 0, 11744, 268502032, 0, 11744, 268502032, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14928, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 14944, 1430607189, 0, 15888, 268435456, 0, 15904, 268435456, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369410881165696_90_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369410881165696_90_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..13795820 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369410881165696_90_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1280, 89456640, 0, 1280, 89456640, 0, 1280, 89456640, 0, 1280, 89456640, 0, 1280, 89456640, 0, 1280, 89456640, 0, 2560, 17, 0, 2560, 17, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1920, 4026531855, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1536, 65520, 0, 1280, 89456640, 0, 1280, 89456640, 0, 1280, 89456640, 0, 1280, 89456640, 0, 1280, 89456640, 0, 1280, 89456640, 0, 2560, 17, 0, 2560, 17, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369421598381092_94_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369421598381092_94_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..820d8152 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369421598381092_94_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 12)) { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 268501008, 0, 2368, 268501008, 0, 2368, 268501008, 0, 5632, 256, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 8448, 85, 0, 8448, 85, 0, 8448, 85, 0, 8448, 85, 0, 9664, 85, 0, 9664, 85, 0, 9664, 85, 0, 9664, 85, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 2368, 268501008, 0, 2368, 268501008, 0, 2368, 268501008, 0, 5632, 256, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6528, 1145324612, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 8448, 85, 0, 8448, 85, 0, 8448, 85, 0, 8448, 85, 0, 9664, 85, 0, 9664, 85, 0, 9664, 85, 0, 9664, 85, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0, 10240, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369424066230773_95_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369424066230773_95_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..325e39a6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369424066230773_95_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,305 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 16))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((301 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((308 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7744, 136348168, 0, 7744, 136348168, 0, 7744, 136348168, 0, 7744, 136348168, 0, 7744, 136348168, 0, 8384, 65, 0, 8384, 65, 0, 14016, 1, 0, 14592, 272696336, 0, 14592, 272696336, 0, 14592, 272696336, 0, 14592, 272696336, 0, 14592, 272696336, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 17024, 1426063360, 0, 17024, 1426063360, 0, 17024, 1426063360, 0, 17024, 1426063360, 0, 17040, 1426063360, 0, 17040, 1426063360, 0, 17040, 1426063360, 0, 17040, 1426063360, 0, 19712, 1, 0, 19728, 1, 0, 7744, 136348168, 0, 7744, 136348168, 0, 7744, 136348168, 0, 7744, 136348168, 0, 7744, 136348168, 0, 8384, 65, 0, 8384, 65, 0, 14016, 1, 0, 14592, 272696336, 0, 14592, 272696336, 0, 14592, 272696336, 0, 14592, 272696336, 0, 14592, 272696336, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 14912, 613566756, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 16064, 1431655765, 0, 17024, 1426063360, 0, 17024, 1426063360, 0, 17024, 1426063360, 0, 17024, 1426063360, 0, 17040, 1426063360, 0, 17040, 1426063360, 0, 17040, 1426063360, 0, 17040, 1426063360, 0, 19712, 1, 0, 19728, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369425889823931_96_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369425889823931_96_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5cbdb8f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369425889823931_96_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((68 << 6) | (counter0 << 4)) | (i1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1616, 2147615760, 0, 1616, 2147615760, 0, 1616, 2147615760, 0, 1616, 2147615760, 0, 1632, 2147615760, 0, 1632, 2147615760, 0, 1632, 2147615760, 0, 1632, 2147615760, 0, 1616, 2147615760, 0, 1616, 2147615760, 0, 1616, 2147615760, 0, 1616, 2147615760, 0, 1632, 2147615760, 0, 1632, 2147615760, 0, 1632, 2147615760, 0, 1632, 2147615760, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369459549071271_98_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369459549071271_98_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..500da1e8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369459549071271_98_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,277 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 492 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1664, 146, 0, 1664, 146, 0, 1664, 146, 0, 1680, 146, 0, 1680, 146, 0, 1680, 146, 0, 1696, 146, 0, 1696, 146, 0, 1696, 146, 0, 3072, 16, 0, 3088, 16, 0, 3104, 16, 0, 4352, 146, 0, 4352, 146, 0, 4352, 146, 0, 4368, 146, 0, 4368, 146, 0, 4368, 146, 0, 4384, 146, 0, 4384, 146, 0, 4384, 146, 0, 7236, 4194304, 0, 7240, 4194304, 0, 7252, 4194304, 0, 7256, 4194304, 0, 7268, 4194304, 0, 7272, 4194304, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9728, 1397760, 0, 9728, 1397760, 0, 9728, 1397760, 0, 9728, 1397760, 0, 9728, 1397760, 0, 9728, 1397760, 0, 13056, 262144, 0, 13632, 332800, 0, 13632, 332800, 0, 13632, 332800, 0, 13632, 332800, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 14784, 698880, 0, 14784, 698880, 0, 14784, 698880, 0, 14784, 698880, 0, 14784, 698880, 0, 14784, 698880, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1664, 146, 0, 1664, 146, 0, 1664, 146, 0, 1680, 146, 0, 1680, 146, 0, 1680, 146, 0, 1696, 146, 0, 1696, 146, 0, 1696, 146, 0, 3072, 16, 0, 3088, 16, 0, 3104, 16, 0, 4352, 146, 0, 4352, 146, 0, 4352, 146, 0, 4368, 146, 0, 4368, 146, 0, 4368, 146, 0, 4384, 146, 0, 4384, 146, 0, 4384, 146, 0, 7236, 4194304, 0, 7240, 4194304, 0, 7252, 4194304, 0, 7256, 4194304, 0, 7268, 4194304, 0, 7272, 4194304, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 7744, 613566756, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9152, 4026532351, 0, 9728, 1397760, 0, 9728, 1397760, 0, 9728, 1397760, 0, 9728, 1397760, 0, 9728, 1397760, 0, 9728, 1397760, 0, 13056, 262144, 0, 13632, 332800, 0, 13632, 332800, 0, 13632, 332800, 0, 13632, 332800, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 13952, 2096640, 0, 14784, 698880, 0, 14784, 698880, 0, 14784, 698880, 0, 14784, 698880, 0, 14784, 698880, 0, 14784, 698880, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369489612640292_99_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369489612640292_99_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..32190cf8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369489612640292_99_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,398 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 24))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((205 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 21))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((340 << 6) | (counter6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((347 << 6) | (counter6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1872, 8, 0, 1888, 8, 0, 1904, 8, 0, 2900, 16777216, 0, 2904, 16777216, 0, 2908, 16777216, 0, 2916, 16777216, 0, 2920, 16777216, 0, 2924, 16777216, 0, 2932, 16777216, 0, 2936, 16777216, 0, 2940, 16777216, 0, 7488, 16, 0, 8128, 16, 0, 9792, 4194304, 0, 9808, 4194304, 0, 10496, 4194304, 0, 10512, 4194304, 0, 10944, 524416, 0, 10944, 524416, 0, 16192, 32, 0, 20752, 67108868, 0, 20752, 67108868, 0, 20768, 67108868, 0, 20768, 67108868, 0, 22928, 4, 0, 22944, 4, 0, 24448, 8390656, 0, 24448, 8390656, 0, 1872, 8, 0, 1888, 8, 0, 1904, 8, 0, 2900, 16777216, 0, 2904, 16777216, 0, 2908, 16777216, 0, 2916, 16777216, 0, 2920, 16777216, 0, 2924, 16777216, 0, 2932, 16777216, 0, 2936, 16777216, 0, 2940, 16777216, 0, 7488, 16, 0, 8128, 16, 0, 9792, 4194304, 0, 9808, 4194304, 0, 10496, 4194304, 0, 10512, 4194304, 0, 10944, 524416, 0, 10944, 524416, 0, 16192, 32, 0, 20752, 67108868, 0, 20752, 67108868, 0, 20768, 67108868, 0, 20768, 67108868, 0, 22928, 4, 0, 22944, 4, 0, 24448, 8390656, 0, 24448, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369522162963212_100_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369522162963212_100_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d122ff6c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369522162963212_100_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,129 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 85, 0, 2048, 85, 0, 2048, 85, 0, 2048, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 85, 0, 2048, 85, 0, 2048, 85, 0, 2048, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369592115655912_102_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369592115655912_102_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b097208 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369592115655912_102_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3136, 4, 0, 3152, 4, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 6464, 272696336, 0, 6464, 272696336, 0, 6464, 272696336, 0, 6464, 272696336, 0, 6464, 272696336, 0, 9024, 536870916, 0, 9024, 536870916, 0, 9028, 536870916, 0, 9028, 536870916, 0, 9040, 536870916, 0, 9040, 536870916, 0, 9044, 536870916, 0, 9044, 536870916, 0, 9056, 536870916, 0, 9056, 536870916, 0, 9060, 536870916, 0, 9060, 536870916, 0, 2176, 134217728, 0, 3136, 4, 0, 3152, 4, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 4544, 4261412879, 0, 6464, 272696336, 0, 6464, 272696336, 0, 6464, 272696336, 0, 6464, 272696336, 0, 6464, 272696336, 0, 9024, 536870916, 0, 9024, 536870916, 0, 9028, 536870916, 0, 9028, 536870916, 0, 9040, 536870916, 0, 9040, 536870916, 0, 9044, 536870916, 0, 9044, 536870916, 0, 9056, 536870916, 0, 9056, 536870916, 0, 9060, 536870916, 0, 9060, 536870916, 0, 2176, 134217728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369599990488304_103_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369599990488304_103_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8cc00afb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369599990488304_103_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,289 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 28))) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((195 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [13632, 268435456, 0, 13648, 268435456, 0, 15168, 65, 0, 15168, 65, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 22400, 68161538, 0, 22400, 68161538, 0, 22400, 68161538, 0, 22400, 68161538, 0, 22016, 134743232, 0, 22016, 134743232, 0, 22016, 134743232, 0, 22016, 134743232, 0, 22016, 134743232, 0, 13632, 268435456, 0, 13648, 268435456, 0, 15168, 65, 0, 15168, 65, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 15744, 1095046225, 0, 22400, 68161538, 0, 22400, 68161538, 0, 22400, 68161538, 0, 22400, 68161538, 0, 22016, 134743232, 0, 22016, 134743232, 0, 22016, 134743232, 0, 22016, 134743232, 0, 22016, 134743232, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369603322422164_104_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369603322422164_104_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..16f245db --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369603322422164_104_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6080, 1078001665, 0, 6080, 1078001665, 0, 6080, 1078001665, 0, 6080, 1078001665, 0, 6096, 1078001665, 0, 6096, 1078001665, 0, 6096, 1078001665, 0, 6096, 1078001665, 0, 6112, 1078001665, 0, 6112, 1078001665, 0, 6112, 1078001665, 0, 6112, 1078001665, 0, 7296, 20971520, 0, 7296, 20971520, 0, 7312, 20971520, 0, 7312, 20971520, 0, 7328, 20971520, 0, 7328, 20971520, 0, 6080, 1078001665, 0, 6080, 1078001665, 0, 6080, 1078001665, 0, 6080, 1078001665, 0, 6096, 1078001665, 0, 6096, 1078001665, 0, 6096, 1078001665, 0, 6096, 1078001665, 0, 6112, 1078001665, 0, 6112, 1078001665, 0, 6112, 1078001665, 0, 6112, 1078001665, 0, 7296, 20971520, 0, 7296, 20971520, 0, 7312, 20971520, 0, 7312, 20971520, 0, 7328, 20971520, 0, 7328, 20971520, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369836382998344_107_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369836382998344_107_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..745bc5af --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369836382998344_107_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9600, 136445954, 0, 9600, 136445954, 0, 9600, 136445954, 0, 9600, 136445954, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9984, 1431655765, 0, 9600, 136445954, 0, 9600, 136445954, 0, 9600, 136445954, 0, 9600, 136445954, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369837071943425_108_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369837071943425_108_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3bde1dd2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369837071943425_108_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 27))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 642 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5568, 2048, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 15552, 545259648, 0, 15552, 545259648, 0, 15552, 545259648, 0, 15568, 545259648, 0, 15568, 545259648, 0, 15568, 545259648, 0, 15584, 545259648, 0, 15584, 545259648, 0, 15584, 545259648, 0, 16704, 2684354562, 0, 16704, 2684354562, 0, 16704, 2684354562, 0, 16720, 2684354562, 0, 16720, 2684354562, 0, 16720, 2684354562, 0, 16736, 2684354562, 0, 16736, 2684354562, 0, 16736, 2684354562, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5568, 2048, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13248, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13264, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13280, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13824, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13840, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 13856, 1430607185, 0, 15552, 545259648, 0, 15552, 545259648, 0, 15552, 545259648, 0, 15568, 545259648, 0, 15568, 545259648, 0, 15568, 545259648, 0, 15584, 545259648, 0, 15584, 545259648, 0, 15584, 545259648, 0, 16704, 2684354562, 0, 16704, 2684354562, 0, 16704, 2684354562, 0, 16720, 2684354562, 0, 16720, 2684354562, 0, 16720, 2684354562, 0, 16736, 2684354562, 0, 16736, 2684354562, 0, 16736, 2684354562, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369861559792378_109_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369861559792378_109_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..39f6f0cd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369861559792378_109_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,183 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 18))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 354 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 3024, 1, 0, 3040, 1, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 3024, 1, 0, 3040, 1, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5776, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 5792, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6528, 286331153, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 6848, 1145324612, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0, 7296, 838860, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369863682019226_110_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369863682019226_110_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab8e1e5b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369863682019226_110_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,220 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 642 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5200, 17, 0, 5200, 17, 0, 5216, 17, 0, 5216, 17, 0, 5232, 17, 0, 5232, 17, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9152, 4194304, 0, 5200, 17, 0, 5200, 17, 0, 5216, 17, 0, 5216, 17, 0, 5232, 17, 0, 5232, 17, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6096, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6112, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 6128, 1145324612, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7376, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7392, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7408, 1430257665, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7952, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7968, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 7984, 1431655765, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9408, 1023, 0, 9152, 4194304, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369870109775627_111_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369870109775627_111_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b46d900b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369870109775627_111_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369878550159458_114_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369878550159458_114_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7ded85bb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369878550159458_114_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 2432, 2290089984, 0, 2432, 2290089984, 0, 2432, 2290089984, 0, 2448, 2290089984, 0, 2448, 2290089984, 0, 2448, 2290089984, 0, 2464, 2290089984, 0, 2464, 2290089984, 0, 2464, 2290089984, 0, 3072, 2147483648, 0, 3088, 2147483648, 0, 3104, 2147483648, 0, 4544, 32768, 0, 4560, 32768, 0, 4576, 32768, 0, 576, 17, 0, 576, 17, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 2432, 2290089984, 0, 2432, 2290089984, 0, 2432, 2290089984, 0, 2448, 2290089984, 0, 2448, 2290089984, 0, 2448, 2290089984, 0, 2464, 2290089984, 0, 2464, 2290089984, 0, 2464, 2290089984, 0, 3072, 2147483648, 0, 3088, 2147483648, 0, 3104, 2147483648, 0, 4544, 32768, 0, 4560, 32768, 0, 4576, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369901682503000_116_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369901682503000_116_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..19e90a52 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369901682503000_116_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,220 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 28))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 31)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3536, 268435456, 0, 3552, 268435456, 0, 5136, 2080, 0, 5136, 2080, 0, 5152, 2080, 0, 5152, 2080, 0, 6464, 17, 0, 6464, 17, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3536, 268435456, 0, 3552, 268435456, 0, 5136, 2080, 0, 5136, 2080, 0, 5152, 2080, 0, 5152, 2080, 0, 6464, 17, 0, 6464, 17, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369902606412247_117_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369902606412247_117_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..12a1680a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369902606412247_117_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 2415919250, 0, 1792, 2415919250, 0, 1792, 2415919250, 0, 1792, 2415919250, 0, 1792, 2415919250, 0, 1808, 2415919250, 0, 1808, 2415919250, 0, 1808, 2415919250, 0, 1808, 2415919250, 0, 1808, 2415919250, 0, 1824, 2415919250, 0, 1824, 2415919250, 0, 1824, 2415919250, 0, 1824, 2415919250, 0, 1824, 2415919250, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 2415919250, 0, 1792, 2415919250, 0, 1792, 2415919250, 0, 1792, 2415919250, 0, 1792, 2415919250, 0, 1808, 2415919250, 0, 1808, 2415919250, 0, 1808, 2415919250, 0, 1808, 2415919250, 0, 1808, 2415919250, 0, 1824, 2415919250, 0, 1824, 2415919250, 0, 1824, 2415919250, 0, 1824, 2415919250, 0, 1824, 2415919250, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0, 2112, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369938598972160_119_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369938598972160_119_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..efd97bba --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369938598972160_119_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 9088, 8, 0, 9104, 8, 0, 9120, 8, 0, 10240, 7, 0, 10240, 7, 0, 10240, 7, 0, 10256, 7, 0, 10256, 7, 0, 10256, 7, 0, 10272, 7, 0, 10272, 7, 0, 10272, 7, 0, 11072, 7, 0, 11072, 7, 0, 11072, 7, 0, 11088, 7, 0, 11088, 7, 0, 11088, 7, 0, 11104, 7, 0, 11104, 7, 0, 11104, 7, 0, 12608, 73, 0, 12608, 73, 0, 12608, 73, 0, 13184, 272696336, 0, 13184, 272696336, 0, 13184, 272696336, 0, 13184, 272696336, 0, 13184, 272696336, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 576, 17, 0, 576, 17, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 7488, 838860, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 8128, 2047, 0, 9088, 8, 0, 9104, 8, 0, 9120, 8, 0, 10240, 7, 0, 10240, 7, 0, 10240, 7, 0, 10256, 7, 0, 10256, 7, 0, 10256, 7, 0, 10272, 7, 0, 10272, 7, 0, 10272, 7, 0, 11072, 7, 0, 11072, 7, 0, 11072, 7, 0, 11088, 7, 0, 11088, 7, 0, 11088, 7, 0, 11104, 7, 0, 11104, 7, 0, 11104, 7, 0, 12608, 73, 0, 12608, 73, 0, 12608, 73, 0, 13184, 272696336, 0, 13184, 272696336, 0, 13184, 272696336, 0, 13184, 272696336, 0, 13184, 272696336, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0, 13504, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369958997168525_121_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369958997168525_121_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66762e8b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369958997168525_121_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,212 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1984, 73, 0, 1984, 73, 0, 1984, 73, 0, 6352, 2147483648, 0, 6368, 2147483648, 0, 6384, 2147483648, 0, 9408, 67108864, 0, 9424, 67108864, 0, 9440, 67108864, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1984, 73, 0, 1984, 73, 0, 1984, 73, 0, 6352, 2147483648, 0, 6368, 2147483648, 0, 6384, 2147483648, 0, 9408, 67108864, 0, 9424, 67108864, 0, 9440, 67108864, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756369962993772950_122_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756369962993772950_122_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..592cbb4c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756369962993772950_122_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 5184, 559240, 0, 5184, 559240, 0, 5184, 559240, 0, 5184, 559240, 0, 5184, 559240, 0, 6848, 8, 0, 8448, 16384, 0, 8452, 16384, 0, 8456, 16384, 0, 8464, 16384, 0, 8468, 16384, 0, 8472, 16384, 0, 8480, 16384, 0, 8484, 16384, 0, 8488, 16384, 0, 9024, 16384, 0, 9028, 16384, 0, 9032, 16384, 0, 9040, 16384, 0, 9044, 16384, 0, 9048, 16384, 0, 9056, 16384, 0, 9060, 16384, 0, 9064, 16384, 0, 10880, 73, 0, 10880, 73, 0, 10880, 73, 0, 11456, 272696336, 0, 11456, 272696336, 0, 11456, 272696336, 0, 11456, 272696336, 0, 11456, 272696336, 0, 576, 17, 0, 576, 17, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 5184, 559240, 0, 5184, 559240, 0, 5184, 559240, 0, 5184, 559240, 0, 5184, 559240, 0, 6848, 8, 0, 8448, 16384, 0, 8452, 16384, 0, 8456, 16384, 0, 8464, 16384, 0, 8468, 16384, 0, 8472, 16384, 0, 8480, 16384, 0, 8484, 16384, 0, 8488, 16384, 0, 9024, 16384, 0, 9028, 16384, 0, 9032, 16384, 0, 9040, 16384, 0, 9044, 16384, 0, 9048, 16384, 0, 9056, 16384, 0, 9060, 16384, 0, 9064, 16384, 0, 10880, 73, 0, 10880, 73, 0, 10880, 73, 0, 11456, 272696336, 0, 11456, 272696336, 0, 11456, 272696336, 0, 11456, 272696336, 0, 11456, 272696336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370026390167209_124_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370026390167209_124_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4e68b897 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370026390167209_124_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,168 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 3584, 73, 0, 3584, 73, 0, 3584, 73, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 2688, 613566756, 0, 3584, 73, 0, 3584, 73, 0, 3584, 73, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0, 4480, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370027547908638_125_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370027547908638_125_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a8da8677 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370027547908638_125_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,356 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 28)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 1073741824, 0, 5120, 1207959561, 0, 5120, 1207959561, 0, 5120, 1207959561, 0, 5120, 1207959561, 0, 6928, 1090519105, 0, 6928, 1090519105, 0, 6928, 1090519105, 0, 6928, 1090519105, 0, 13632, 272696336, 0, 13632, 272696336, 0, 13632, 272696336, 0, 13632, 272696336, 0, 13632, 272696336, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 16208, 33554432, 0, 16224, 33554432, 0, 16784, 33554432, 0, 16800, 33554432, 0, 17472, 131073, 0, 17472, 131073, 0, 2880, 1073741824, 0, 5120, 1207959561, 0, 5120, 1207959561, 0, 5120, 1207959561, 0, 5120, 1207959561, 0, 6928, 1090519105, 0, 6928, 1090519105, 0, 6928, 1090519105, 0, 6928, 1090519105, 0, 13632, 272696336, 0, 13632, 272696336, 0, 13632, 272696336, 0, 13632, 272696336, 0, 13632, 272696336, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 13952, 613566756, 0, 16208, 33554432, 0, 16224, 33554432, 0, 16784, 33554432, 0, 16800, 33554432, 0, 17472, 131073, 0, 17472, 131073, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370216623291925_127_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370216623291925_127_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66434a2d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370216623291925_127_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 2640, 4096, 0, 2656, 4096, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 1728, 4278190095, 0, 2640, 4096, 0, 2656, 4096, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370217607289630_128_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370217607289630_128_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44b07ae8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370217607289630_128_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,200 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 16, 0, 3024, 16, 0, 3040, 16, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 3008, 16, 0, 3024, 16, 0, 3040, 16, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5888, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5904, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 5920, 286331153, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370442333031227_130_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370442333031227_130_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b044a8af --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370442333031227_130_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3200, 32768, 0, 2816, 112, 0, 2816, 112, 0, 2816, 112, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3584, 4227858447, 0, 3200, 32768, 0, 2816, 112, 0, 2816, 112, 0, 2816, 112, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0, 2432, 44706432, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370448233999430_132_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370448233999430_132_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e83289ab --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370448233999430_132_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370448519519461_133_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370448519519461_133_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3b937cd6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370448519519461_133_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,92 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 19)) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 4194304, 0, 1792, 4194304, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370448672401116_134_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370448672401116_134_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a86fbca4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370448672401116_134_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 545267840, 0, 2624, 545267840, 0, 2624, 545267840, 0, 2624, 545267840, 0, 2368, 3222278208, 0, 2368, 3222278208, 0, 2368, 3222278208, 0, 2368, 3222278208, 0, 2368, 3222278208, 0, 2624, 545267840, 0, 2624, 545267840, 0, 2624, 545267840, 0, 2624, 545267840, 0, 2368, 3222278208, 0, 2368, 3222278208, 0, 2368, 3222278208, 0, 2368, 3222278208, 0, 2368, 3222278208, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370449339524566_136_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370449339524566_136_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..34e8c04a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370449339524566_136_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,285 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 20)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 7488, 73, 0, 7488, 73, 0, 7488, 73, 0, 12608, 272696336, 0, 12608, 272696336, 0, 12608, 272696336, 0, 12608, 272696336, 0, 12608, 272696336, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6144, 1145324612, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 7488, 73, 0, 7488, 73, 0, 7488, 73, 0, 12608, 272696336, 0, 12608, 272696336, 0, 12608, 272696336, 0, 12608, 272696336, 0, 12608, 272696336, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14144, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0, 14160, 612368676, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370817277616777_138_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370817277616777_138_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2c30bdf4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370817277616777_138_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 2816, 524288, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2176, 2097152, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 3200, 1431655765, 0, 2816, 524288, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2432, 2684354730, 0, 2176, 2097152, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370823946756944_140_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370823946756944_140_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b73de56b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370823946756944_140_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((203 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((270 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((291 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 444 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 268501008, 0, 3584, 268501008, 0, 3584, 268501008, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 9600, 17, 0, 9600, 17, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 18644, 8388608, 0, 18648, 8388608, 0, 18652, 8388608, 0, 18660, 8388608, 0, 18664, 8388608, 0, 18668, 8388608, 0, 18676, 8388608, 0, 18680, 8388608, 0, 18684, 8388608, 0, 3584, 268501008, 0, 3584, 268501008, 0, 3584, 268501008, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 7936, 286331153, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8256, 2004318071, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 8704, 1048575, 0, 9600, 17, 0, 9600, 17, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 14464, 1145324612, 0, 18644, 8388608, 0, 18648, 8388608, 0, 18652, 8388608, 0, 18660, 8388608, 0, 18664, 8388608, 0, 18668, 8388608, 0, 18676, 8388608, 0, 18680, 8388608, 0, 18684, 8388608, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756370918727039541_142_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756370918727039541_142_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d09b1aca --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756370918727039541_142_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,486 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 16)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 16)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((357 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + if ((i5 == 2)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 64, 0, 1232, 64, 0, 1248, 64, 0, 3204, 32768, 0, 3208, 32768, 0, 3212, 32768, 0, 3220, 32768, 0, 3224, 32768, 0, 3228, 32768, 0, 3236, 32768, 0, 3240, 32768, 0, 3244, 32768, 0, 3648, 64, 0, 3664, 64, 0, 3680, 64, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 10752, 2147483649, 0, 10752, 2147483649, 0, 11392, 73, 0, 11392, 73, 0, 11392, 73, 0, 13376, 36, 0, 13376, 36, 0, 14080, 65536, 0, 16320, 4718592, 0, 16320, 4718592, 0, 16960, 17, 0, 16960, 17, 0, 20736, 537002016, 0, 20736, 537002016, 0, 20736, 537002016, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 24576, 32768, 0, 1216, 64, 0, 1232, 64, 0, 1248, 64, 0, 3204, 32768, 0, 3208, 32768, 0, 3212, 32768, 0, 3220, 32768, 0, 3224, 32768, 0, 3228, 32768, 0, 3236, 32768, 0, 3240, 32768, 0, 3244, 32768, 0, 3648, 64, 0, 3664, 64, 0, 3680, 64, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 10752, 2147483649, 0, 10752, 2147483649, 0, 11392, 73, 0, 11392, 73, 0, 11392, 73, 0, 13376, 36, 0, 13376, 36, 0, 14080, 65536, 0, 16320, 4718592, 0, 16320, 4718592, 0, 16960, 17, 0, 16960, 17, 0, 20736, 537002016, 0, 20736, 537002016, 0, 20736, 537002016, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 24576, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371220282350345_144_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371220282350345_144_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6650dbb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371220282350345_144_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,200 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 600 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1872, 63, 0, 1872, 63, 0, 1872, 63, 0, 1872, 63, 0, 1872, 63, 0, 1872, 63, 0, 1888, 63, 0, 1888, 63, 0, 1888, 63, 0, 1888, 63, 0, 1888, 63, 0, 1888, 63, 0, 1904, 63, 0, 1904, 63, 0, 1904, 63, 0, 1904, 63, 0, 1904, 63, 0, 1904, 63, 0, 2960, 21, 0, 2960, 21, 0, 2960, 21, 0, 2964, 21, 0, 2964, 21, 0, 2964, 21, 0, 2968, 21, 0, 2968, 21, 0, 2968, 21, 0, 2976, 21, 0, 2976, 21, 0, 2976, 21, 0, 2980, 21, 0, 2980, 21, 0, 2980, 21, 0, 2984, 21, 0, 2984, 21, 0, 2984, 21, 0, 2992, 21, 0, 2992, 21, 0, 2992, 21, 0, 2996, 21, 0, 2996, 21, 0, 2996, 21, 0, 3000, 21, 0, 3000, 21, 0, 3000, 21, 0, 3600, 9, 0, 3600, 9, 0, 3604, 9, 0, 3604, 9, 0, 3608, 9, 0, 3608, 9, 0, 3616, 9, 0, 3616, 9, 0, 3620, 9, 0, 3620, 9, 0, 3624, 9, 0, 3624, 9, 0, 3632, 9, 0, 3632, 9, 0, 3636, 9, 0, 3636, 9, 0, 3640, 9, 0, 3640, 9, 0, 4176, 16, 0, 4180, 16, 0, 4184, 16, 0, 4192, 16, 0, 4196, 16, 0, 4200, 16, 0, 4208, 16, 0, 4212, 16, 0, 4216, 16, 0, 4496, 36, 0, 4496, 36, 0, 4500, 36, 0, 4500, 36, 0, 4504, 36, 0, 4504, 36, 0, 4512, 36, 0, 4512, 36, 0, 4516, 36, 0, 4516, 36, 0, 4520, 36, 0, 4520, 36, 0, 4528, 36, 0, 4528, 36, 0, 4532, 36, 0, 4532, 36, 0, 4536, 36, 0, 4536, 36, 0, 5568, 17, 0, 5568, 17, 0, 7104, 67108864, 0, 7120, 67108864, 0, 7136, 67108864, 0, 7552, 559240, 0, 7552, 559240, 0, 7552, 559240, 0, 7552, 559240, 0, 7552, 559240, 0, 1872, 63, 0, 1872, 63, 0, 1872, 63, 0, 1872, 63, 0, 1872, 63, 0, 1872, 63, 0, 1888, 63, 0, 1888, 63, 0, 1888, 63, 0, 1888, 63, 0, 1888, 63, 0, 1888, 63, 0, 1904, 63, 0, 1904, 63, 0, 1904, 63, 0, 1904, 63, 0, 1904, 63, 0, 1904, 63, 0, 2960, 21, 0, 2960, 21, 0, 2960, 21, 0, 2964, 21, 0, 2964, 21, 0, 2964, 21, 0, 2968, 21, 0, 2968, 21, 0, 2968, 21, 0, 2976, 21, 0, 2976, 21, 0, 2976, 21, 0, 2980, 21, 0, 2980, 21, 0, 2980, 21, 0, 2984, 21, 0, 2984, 21, 0, 2984, 21, 0, 2992, 21, 0, 2992, 21, 0, 2992, 21, 0, 2996, 21, 0, 2996, 21, 0, 2996, 21, 0, 3000, 21, 0, 3000, 21, 0, 3000, 21, 0, 3600, 9, 0, 3600, 9, 0, 3604, 9, 0, 3604, 9, 0, 3608, 9, 0, 3608, 9, 0, 3616, 9, 0, 3616, 9, 0, 3620, 9, 0, 3620, 9, 0, 3624, 9, 0, 3624, 9, 0, 3632, 9, 0, 3632, 9, 0, 3636, 9, 0, 3636, 9, 0, 3640, 9, 0, 3640, 9, 0, 4176, 16, 0, 4180, 16, 0, 4184, 16, 0, 4192, 16, 0, 4196, 16, 0, 4200, 16, 0, 4208, 16, 0, 4212, 16, 0, 4216, 16, 0, 4496, 36, 0, 4496, 36, 0, 4500, 36, 0, 4500, 36, 0, 4504, 36, 0, 4504, 36, 0, 4512, 36, 0, 4512, 36, 0, 4516, 36, 0, 4516, 36, 0, 4520, 36, 0, 4520, 36, 0, 4528, 36, 0, 4528, 36, 0, 4532, 36, 0, 4532, 36, 0, 4536, 36, 0, 4536, 36, 0, 5568, 17, 0, 5568, 17, 0, 7104, 67108864, 0, 7120, 67108864, 0, 7136, 67108864, 0, 7552, 559240, 0, 7552, 559240, 0, 7552, 559240, 0, 7552, 559240, 0, 7552, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371311136357692_148_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371311136357692_148_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..509706f7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371311136357692_148_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 272696336, 0, 1856, 272696336, 0, 1856, 272696336, 0, 1856, 272696336, 0, 1856, 272696336, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 1856, 272696336, 0, 1856, 272696336, 0, 1856, 272696336, 0, 1856, 272696336, 0, 1856, 272696336, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4416, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4432, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0, 4448, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371312350010380_149_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371312350010380_149_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca21660a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371312350010380_149_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 14))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 21))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 2048, 268435456, 0, 4608, 1048576, 0, 8064, 2, 0, 8080, 2, 0, 8096, 2, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 10176, 2281701376, 0, 10176, 2281701376, 0, 10816, 8, 0, 11712, 8388608, 0, 12160, 136, 0, 12160, 136, 0, 768, 1, 0, 2048, 268435456, 0, 4608, 1048576, 0, 8064, 2, 0, 8080, 2, 0, 8096, 2, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 8384, 1145324612, 0, 10176, 2281701376, 0, 10176, 2281701376, 0, 10816, 8, 0, 11712, 8388608, 0, 12160, 136, 0, 12160, 136, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371318340220766_150_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371318340220766_150_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1c8456d1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371318340220766_150_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,294 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 21)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 16)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 13)) { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4369, 0, 1088, 4369, 0, 1088, 4369, 0, 1088, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 3012, 17, 0, 3012, 17, 0, 3028, 17, 0, 3028, 17, 0, 6160, 570425344, 0, 6160, 570425344, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 1088, 4369, 0, 1088, 4369, 0, 1088, 4369, 0, 1088, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 3012, 17, 0, 3012, 17, 0, 3028, 17, 0, 3028, 17, 0, 6160, 570425344, 0, 6160, 570425344, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371431403172770_152_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371431403172770_152_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5d839c9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371431403172770_152_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 390 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 2132, 4195328, 0, 2132, 4195328, 0, 2136, 4195328, 0, 2136, 4195328, 0, 2148, 4195328, 0, 2148, 4195328, 0, 2152, 4195328, 0, 2152, 4195328, 0, 5780, 8388672, 0, 5780, 8388672, 0, 5784, 8388672, 0, 5784, 8388672, 0, 5796, 8388672, 0, 5796, 8388672, 0, 5800, 8388672, 0, 5800, 8388672, 0, 10304, 17039361, 0, 10304, 17039361, 0, 10304, 17039361, 0, 10320, 17039361, 0, 10320, 17039361, 0, 10320, 17039361, 0, 10336, 17039361, 0, 10336, 17039361, 0, 10336, 17039361, 0, 11264, 262144, 0, 11280, 262144, 0, 11296, 262144, 0, 11840, 272696336, 0, 11840, 272696336, 0, 11840, 272696336, 0, 11840, 272696336, 0, 11840, 272696336, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 976, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 992, 1431655765, 0, 2132, 4195328, 0, 2132, 4195328, 0, 2136, 4195328, 0, 2136, 4195328, 0, 2148, 4195328, 0, 2148, 4195328, 0, 2152, 4195328, 0, 2152, 4195328, 0, 5780, 8388672, 0, 5780, 8388672, 0, 5784, 8388672, 0, 5784, 8388672, 0, 5796, 8388672, 0, 5796, 8388672, 0, 5800, 8388672, 0, 5800, 8388672, 0, 10304, 17039361, 0, 10304, 17039361, 0, 10304, 17039361, 0, 10320, 17039361, 0, 10320, 17039361, 0, 10320, 17039361, 0, 10336, 17039361, 0, 10336, 17039361, 0, 10336, 17039361, 0, 11264, 262144, 0, 11280, 262144, 0, 11296, 262144, 0, 11840, 272696336, 0, 11840, 272696336, 0, 11840, 272696336, 0, 11840, 272696336, 0, 11840, 272696336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371459410563075_154_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371459410563075_154_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a5faaf1a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371459410563075_154_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371459711264293_155_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371459711264293_155_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d2478ea --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371459711264293_155_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,287 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1216, 16, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 4672, 17, 0, 4672, 17, 0, 4688, 17, 0, 4688, 17, 0, 4704, 17, 0, 4704, 17, 0, 7296, 16, 0, 7312, 16, 0, 7328, 16, 0, 11200, 536870912, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 12160, 8, 0, 13632, 8390656, 0, 13632, 8390656, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1216, 16, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 2112, 613566756, 0, 4672, 17, 0, 4672, 17, 0, 4688, 17, 0, 4688, 17, 0, 4704, 17, 0, 4704, 17, 0, 7296, 16, 0, 7312, 16, 0, 7328, 16, 0, 11200, 536870912, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 11520, 1145324612, 0, 12160, 8, 0, 13632, 8390656, 0, 13632, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371468925248529_156_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371468925248529_156_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3d9383d1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371468925248529_156_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371469473660183_157_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371469473660183_157_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cba9241a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371469473660183_157_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,342 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 29))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((355 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (402 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (411 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (418 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 64, 0, 3408, 64, 0, 3424, 64, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 5568, 32, 0, 7504, 131592, 0, 7504, 131592, 0, 7504, 131592, 0, 7520, 131592, 0, 7520, 131592, 0, 7520, 131592, 0, 7536, 131592, 0, 7536, 131592, 0, 7536, 131592, 0, 8592, 2048, 0, 8596, 2048, 0, 8600, 2048, 0, 8608, 2048, 0, 8612, 2048, 0, 8616, 2048, 0, 8624, 2048, 0, 8628, 2048, 0, 8632, 2048, 0, 10192, 524290, 0, 10192, 524290, 0, 10208, 524290, 0, 10208, 524290, 0, 10224, 524290, 0, 10224, 524290, 0, 10880, 8388608, 0, 13248, 17, 0, 13248, 17, 0, 13264, 17, 0, 13264, 17, 0, 13952, 17, 0, 13952, 17, 0, 13968, 17, 0, 13968, 17, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 26752, 559240, 0, 26752, 559240, 0, 26752, 559240, 0, 26752, 559240, 0, 26752, 559240, 0, 3392, 64, 0, 3408, 64, 0, 3424, 64, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 4864, 1431655765, 0, 5568, 32, 0, 7504, 131592, 0, 7504, 131592, 0, 7504, 131592, 0, 7520, 131592, 0, 7520, 131592, 0, 7520, 131592, 0, 7536, 131592, 0, 7536, 131592, 0, 7536, 131592, 0, 8592, 2048, 0, 8596, 2048, 0, 8600, 2048, 0, 8608, 2048, 0, 8612, 2048, 0, 8616, 2048, 0, 8624, 2048, 0, 8628, 2048, 0, 8632, 2048, 0, 10192, 524290, 0, 10192, 524290, 0, 10208, 524290, 0, 10208, 524290, 0, 10224, 524290, 0, 10224, 524290, 0, 10880, 8388608, 0, 13248, 17, 0, 13248, 17, 0, 13264, 17, 0, 13264, 17, 0, 13952, 17, 0, 13952, 17, 0, 13968, 17, 0, 13968, 17, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 15168, 286331153, 0, 26752, 559240, 0, 26752, 559240, 0, 26752, 559240, 0, 26752, 559240, 0, 26752, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371529313072926_159_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371529313072926_159_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..925ec1e1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371529313072926_159_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 1342177280, 0, 1664, 1342177280, 0, 1680, 1342177280, 0, 1680, 1342177280, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 5008, 8192, 0, 5024, 8192, 0, 7248, 139264, 0, 7248, 139264, 0, 7264, 139264, 0, 7264, 139264, 0, 8192, 262144, 0, 1664, 1342177280, 0, 1664, 1342177280, 0, 1680, 1342177280, 0, 1680, 1342177280, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 2240, 1430257749, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 3072, 4292870271, 0, 5008, 8192, 0, 5024, 8192, 0, 7248, 139264, 0, 7248, 139264, 0, 7264, 139264, 0, 7264, 139264, 0, 8192, 262144, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371530578448289_160_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371530578448289_160_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..49dfe45d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371530578448289_160_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,323 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 27))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((274 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 8192, 0, 2896, 8192, 0, 2912, 8192, 0, 4800, 2, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 11200, 33562626, 0, 11200, 33562626, 0, 11200, 33562626, 0, 13824, 570425890, 0, 13824, 570425890, 0, 13824, 570425890, 0, 13824, 570425890, 0, 13824, 570425890, 0, 16980, 2281701376, 0, 16980, 2281701376, 0, 16984, 2281701376, 0, 16984, 2281701376, 0, 16996, 2281701376, 0, 16996, 2281701376, 0, 17000, 2281701376, 0, 17000, 2281701376, 0, 2880, 8192, 0, 2896, 8192, 0, 2912, 8192, 0, 4800, 2, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 4416, 2863311528, 0, 11200, 33562626, 0, 11200, 33562626, 0, 11200, 33562626, 0, 13824, 570425890, 0, 13824, 570425890, 0, 13824, 570425890, 0, 13824, 570425890, 0, 13824, 570425890, 0, 16980, 2281701376, 0, 16980, 2281701376, 0, 16984, 2281701376, 0, 16984, 2281701376, 0, 16996, 2281701376, 0, 16996, 2281701376, 0, 17000, 2281701376, 0, 17000, 2281701376, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371820039128416_163_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371820039128416_163_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0b2f679 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371820039128416_163_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371879063523252_165_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371879063523252_165_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0d327684 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371879063523252_165_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,257 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 21)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4288, 134250504, 0, 4288, 134250504, 0, 4288, 134250504, 0, 11584, 1, 0, 11600, 1, 0, 11616, 1, 0, 13248, 1, 0, 13264, 1, 0, 13280, 1, 0, 17088, 138446912, 0, 17088, 138446912, 0, 17088, 138446912, 0, 17088, 138446912, 0, 17088, 138446912, 0, 16832, 3221225473, 0, 16832, 3221225473, 0, 16832, 3221225473, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16320, 8320, 0, 16320, 8320, 0, 576, 17, 0, 576, 17, 0, 4288, 134250504, 0, 4288, 134250504, 0, 4288, 134250504, 0, 11584, 1, 0, 11600, 1, 0, 11616, 1, 0, 13248, 1, 0, 13264, 1, 0, 13280, 1, 0, 17088, 138446912, 0, 17088, 138446912, 0, 17088, 138446912, 0, 17088, 138446912, 0, 17088, 138446912, 0, 16832, 3221225473, 0, 16832, 3221225473, 0, 16832, 3221225473, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16576, 933232640, 0, 16320, 8320, 0, 16320, 8320, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371881371656393_166_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371881371656393_166_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c5cfd187 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371881371656393_166_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,218 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if ((WaveGetLaneIndex() == 16)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 3968, 16781313, 0, 3968, 16781313, 0, 3968, 16781313, 0, 3456, 1409286148, 0, 3456, 1409286148, 0, 3456, 1409286148, 0, 3456, 1409286148, 0, 3072, 1310720, 0, 3072, 1310720, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 4224, 2863311530, 0, 3968, 16781313, 0, 3968, 16781313, 0, 3968, 16781313, 0, 3456, 1409286148, 0, 3456, 1409286148, 0, 3456, 1409286148, 0, 3456, 1409286148, 0, 3072, 1310720, 0, 3072, 1310720, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371881773533295_167_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371881773533295_167_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8087c11d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371881773533295_167_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,229 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 24))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 4))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 648 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 6980, 2155872768, 0, 6980, 2155872768, 0, 6980, 2155872768, 0, 6984, 2155872768, 0, 6984, 2155872768, 0, 6984, 2155872768, 0, 6988, 2155872768, 0, 6988, 2155872768, 0, 6988, 2155872768, 0, 6996, 2155872768, 0, 6996, 2155872768, 0, 6996, 2155872768, 0, 7000, 2155872768, 0, 7000, 2155872768, 0, 7000, 2155872768, 0, 7004, 2155872768, 0, 7004, 2155872768, 0, 7004, 2155872768, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 11072, 20, 0, 11072, 20, 0, 12160, 16781312, 0, 12160, 16781312, 0, 13376, 1409286209, 0, 13376, 1409286209, 0, 13376, 1409286209, 0, 13376, 1409286209, 0, 13376, 1409286209, 0, 13392, 1409286209, 0, 13392, 1409286209, 0, 13392, 1409286209, 0, 13392, 1409286209, 0, 13392, 1409286209, 0, 13408, 1409286209, 0, 13408, 1409286209, 0, 13408, 1409286209, 0, 13408, 1409286209, 0, 13408, 1409286209, 0, 14080, 1073741825, 0, 14080, 1073741825, 0, 14096, 1073741825, 0, 14096, 1073741825, 0, 14112, 1073741825, 0, 14112, 1073741825, 0, 15488, 256, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 6980, 2155872768, 0, 6980, 2155872768, 0, 6980, 2155872768, 0, 6984, 2155872768, 0, 6984, 2155872768, 0, 6984, 2155872768, 0, 6988, 2155872768, 0, 6988, 2155872768, 0, 6988, 2155872768, 0, 6996, 2155872768, 0, 6996, 2155872768, 0, 6996, 2155872768, 0, 7000, 2155872768, 0, 7000, 2155872768, 0, 7000, 2155872768, 0, 7004, 2155872768, 0, 7004, 2155872768, 0, 7004, 2155872768, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7680, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 7696, 1431655765, 0, 11072, 20, 0, 11072, 20, 0, 12160, 16781312, 0, 12160, 16781312, 0, 13376, 1409286209, 0, 13376, 1409286209, 0, 13376, 1409286209, 0, 13376, 1409286209, 0, 13376, 1409286209, 0, 13392, 1409286209, 0, 13392, 1409286209, 0, 13392, 1409286209, 0, 13392, 1409286209, 0, 13392, 1409286209, 0, 13408, 1409286209, 0, 13408, 1409286209, 0, 13408, 1409286209, 0, 13408, 1409286209, 0, 13408, 1409286209, 0, 14080, 1073741825, 0, 14080, 1073741825, 0, 14096, 1073741825, 0, 14096, 1073741825, 0, 14112, 1073741825, 0, 14112, 1073741825, 0, 15488, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371890905283688_168_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371890905283688_168_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..be3dcf1a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371890905283688_168_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 2147483663, 0, 1088, 2147483663, 0, 1088, 2147483663, 0, 1088, 2147483663, 0, 1088, 2147483663, 0, 4804, 8, 0, 4808, 8, 0, 4820, 8, 0, 4824, 8, 0, 6784, 4, 0, 7872, 3221225487, 0, 7872, 3221225487, 0, 7872, 3221225487, 0, 7872, 3221225487, 0, 7872, 3221225487, 0, 7872, 3221225487, 0, 1088, 2147483663, 0, 1088, 2147483663, 0, 1088, 2147483663, 0, 1088, 2147483663, 0, 1088, 2147483663, 0, 4804, 8, 0, 4808, 8, 0, 4820, 8, 0, 4824, 8, 0, 6784, 4, 0, 7872, 3221225487, 0, 7872, 3221225487, 0, 7872, 3221225487, 0, 7872, 3221225487, 0, 7872, 3221225487, 0, 7872, 3221225487, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371903598212373_170_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371903598212373_170_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c155b56b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371903598212373_170_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 31))) { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4224, 4227858432, 0, 4224, 4227858432, 0, 4224, 4227858432, 0, 4224, 4227858432, 0, 4224, 4227858432, 0, 4224, 4227858432, 0, 3968, 33688128, 0, 3968, 33688128, 0, 3968, 33688128, 0, 3968, 33688128, 0, 3968, 33688128, 0, 3584, 4210692, 0, 3584, 4210692, 0, 3584, 4210692, 0, 3328, 8, 0, 3072, 435, 0, 3072, 435, 0, 3072, 435, 0, 3072, 435, 0, 3072, 435, 0, 3072, 435, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 4224, 4227858432, 0, 4224, 4227858432, 0, 4224, 4227858432, 0, 4224, 4227858432, 0, 4224, 4227858432, 0, 4224, 4227858432, 0, 3968, 33688128, 0, 3968, 33688128, 0, 3968, 33688128, 0, 3968, 33688128, 0, 3968, 33688128, 0, 3584, 4210692, 0, 3584, 4210692, 0, 3584, 4210692, 0, 3328, 8, 0, 3072, 435, 0, 3072, 435, 0, 3072, 435, 0, 3072, 435, 0, 3072, 435, 0, 3072, 435, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756371993609781434_173_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756371993609781434_173_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..32c91efe --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756371993609781434_173_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 16))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 23))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1728, 570425346, 0, 1728, 570425346, 0, 1728, 570425346, 0, 4864, 536870914, 0, 4864, 536870914, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 6720, 68, 0, 6720, 68, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 10176, 2147483648, 0, 576, 17, 0, 576, 17, 0, 1728, 570425346, 0, 1728, 570425346, 0, 1728, 570425346, 0, 4864, 536870914, 0, 4864, 536870914, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 5184, 1145324612, 0, 6720, 68, 0, 6720, 68, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 10176, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756372041677033583_176_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756372041677033583_176_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bebbb932 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756372041677033583_176_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5328, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5332, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5344, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0, 5348, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756372062382566028_178_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756372062382566028_178_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3f8a15f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756372062382566028_178_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 5632, 16, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 576, 17, 0, 576, 17, 0, 5632, 16, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 7744, 2004318071, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0, 8192, 1048575, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756372063488898788_179_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756372063488898788_179_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..868fa986 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756372063488898788_179_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,477 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 24)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (338 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (357 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((372 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 1, 0, 3776, 1074004032, 0, 3776, 1074004032, 0, 3776, 1074004032, 0, 4224, 32776, 0, 4224, 32776, 0, 4800, 136347656, 0, 4800, 136347656, 0, 4800, 136347656, 0, 4800, 136347656, 0, 6720, 2449473536, 0, 6720, 2449473536, 0, 6720, 2449473536, 0, 10880, 256, 0, 13056, 1048576, 0, 13696, 17, 0, 13696, 17, 0, 15376, 536870912, 0, 15392, 536870912, 0, 16848, 4, 0, 16864, 4, 0, 20560, 1140850688, 0, 20560, 1140850688, 0, 20576, 1140850688, 0, 20576, 1140850688, 0, 21008, 1145044992, 0, 21008, 1145044992, 0, 21008, 1145044992, 0, 21024, 1145044992, 0, 21024, 1145044992, 0, 21024, 1145044992, 0, 21632, 8, 0, 2880, 1, 0, 3776, 1074004032, 0, 3776, 1074004032, 0, 3776, 1074004032, 0, 4224, 32776, 0, 4224, 32776, 0, 4800, 136347656, 0, 4800, 136347656, 0, 4800, 136347656, 0, 4800, 136347656, 0, 6720, 2449473536, 0, 6720, 2449473536, 0, 6720, 2449473536, 0, 10880, 256, 0, 13056, 1048576, 0, 13696, 17, 0, 13696, 17, 0, 15376, 536870912, 0, 15392, 536870912, 0, 16848, 4, 0, 16864, 4, 0, 20560, 1140850688, 0, 20560, 1140850688, 0, 20576, 1140850688, 0, 20576, 1140850688, 0, 21008, 1145044992, 0, 21008, 1145044992, 0, 21008, 1145044992, 0, 21024, 1145044992, 0, 21024, 1145044992, 0, 21024, 1145044992, 0, 21632, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756372087690326491_180_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756372087690326491_180_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4187ff4a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756372087690326491_180_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 5))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1342177281, 0, 1280, 1342177281, 0, 1280, 1342177281, 0, 4992, 1073741829, 0, 4992, 1073741829, 0, 4992, 1073741829, 0, 9920, 40, 0, 9920, 40, 0, 9936, 40, 0, 9936, 40, 0, 9952, 40, 0, 9952, 40, 0, 1280, 1342177281, 0, 1280, 1342177281, 0, 1280, 1342177281, 0, 4992, 1073741829, 0, 4992, 1073741829, 0, 4992, 1073741829, 0, 9920, 40, 0, 9920, 40, 0, 9936, 40, 0, 9936, 40, 0, 9952, 40, 0, 9952, 40, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756372088292912034_181_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756372088292912034_181_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..34a2c06e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756372088292912034_181_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,369 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 29))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((315 << 6) | (i6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((334 << 6) | (i6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9664, 16384, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 12160, 21, 0, 12160, 21, 0, 12160, 21, 0, 13776, 2315255816, 0, 13776, 2315255816, 0, 13776, 2315255816, 0, 13776, 2315255816, 0, 13792, 2315255816, 0, 13792, 2315255816, 0, 13792, 2315255816, 0, 13792, 2315255816, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 16976, 268435457, 0, 16976, 268435457, 0, 16992, 268435457, 0, 16992, 268435457, 0, 17008, 268435457, 0, 17008, 268435457, 0, 20164, 16384, 0, 20168, 16384, 0, 20180, 16384, 0, 20184, 16384, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 25024, 8, 0, 25040, 8, 0, 25056, 8, 0, 9664, 16384, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 11520, 3221225501, 0, 12160, 21, 0, 12160, 21, 0, 12160, 21, 0, 13776, 2315255816, 0, 13776, 2315255816, 0, 13776, 2315255816, 0, 13776, 2315255816, 0, 13792, 2315255816, 0, 13792, 2315255816, 0, 13792, 2315255816, 0, 13792, 2315255816, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 14464, 3741319169, 0, 16976, 268435457, 0, 16976, 268435457, 0, 16992, 268435457, 0, 16992, 268435457, 0, 17008, 268435457, 0, 17008, 268435457, 0, 20164, 16384, 0, 20168, 16384, 0, 20180, 16384, 0, 20184, 16384, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 23040, 2290649224, 0, 25024, 8, 0, 25040, 8, 0, 25056, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756372126025233850_182_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756372126025233850_182_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e3226b9c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756372126025233850_182_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,296 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((149 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2560, 536870912, 0, 3520, 570425346, 0, 3520, 570425346, 0, 3520, 570425346, 0, 5184, 544, 0, 5184, 544, 0, 5200, 544, 0, 5200, 544, 0, 5216, 544, 0, 5216, 544, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5952, 559240, 0, 5952, 559240, 0, 5952, 559240, 0, 5952, 559240, 0, 5952, 559240, 0, 9556, 4096, 0, 9560, 4096, 0, 9572, 4096, 0, 9576, 4096, 0, 10132, 4096, 0, 10136, 4096, 0, 10148, 4096, 0, 10152, 4096, 0, 11328, 4194320, 0, 11328, 4194320, 0, 576, 17, 0, 576, 17, 0, 2560, 536870912, 0, 3520, 570425346, 0, 3520, 570425346, 0, 3520, 570425346, 0, 5184, 544, 0, 5184, 544, 0, 5200, 544, 0, 5200, 544, 0, 5216, 544, 0, 5216, 544, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5504, 1145324612, 0, 5952, 559240, 0, 5952, 559240, 0, 5952, 559240, 0, 5952, 559240, 0, 5952, 559240, 0, 9556, 4096, 0, 9560, 4096, 0, 9572, 4096, 0, 9576, 4096, 0, 10132, 4096, 0, 10136, 4096, 0, 10148, 4096, 0, 10152, 4096, 0, 11328, 4194320, 0, 11328, 4194320, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373010058801148_187_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373010058801148_187_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..94c2e16e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373010058801148_187_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 576, 17, 0, 576, 17, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373060808299752_189_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373060808299752_189_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3281f80c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373060808299752_189_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,164 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 5200, 2147483656, 0, 5200, 2147483656, 0, 5216, 2147483656, 0, 5216, 2147483656, 0, 5232, 2147483656, 0, 5232, 2147483656, 0, 6352, 2281701384, 0, 6352, 2281701384, 0, 6352, 2281701384, 0, 6368, 2281701384, 0, 6368, 2281701384, 0, 6368, 2281701384, 0, 6384, 2281701384, 0, 6384, 2281701384, 0, 6384, 2281701384, 0, 7056, 2147483648, 0, 7072, 2147483648, 0, 7088, 2147483648, 0, 7760, 2281701384, 0, 7760, 2281701384, 0, 7760, 2281701384, 0, 7776, 2281701384, 0, 7776, 2281701384, 0, 7776, 2281701384, 0, 7792, 2281701384, 0, 7792, 2281701384, 0, 7792, 2281701384, 0, 576, 17, 0, 576, 17, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 4032, 1145324612, 0, 5200, 2147483656, 0, 5200, 2147483656, 0, 5216, 2147483656, 0, 5216, 2147483656, 0, 5232, 2147483656, 0, 5232, 2147483656, 0, 6352, 2281701384, 0, 6352, 2281701384, 0, 6352, 2281701384, 0, 6368, 2281701384, 0, 6368, 2281701384, 0, 6368, 2281701384, 0, 6384, 2281701384, 0, 6384, 2281701384, 0, 6384, 2281701384, 0, 7056, 2147483648, 0, 7072, 2147483648, 0, 7088, 2147483648, 0, 7760, 2281701384, 0, 7760, 2281701384, 0, 7760, 2281701384, 0, 7776, 2281701384, 0, 7776, 2281701384, 0, 7776, 2281701384, 0, 7792, 2281701384, 0, 7792, 2281701384, 0, 7792, 2281701384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373064938904479_190_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373064938904479_190_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..576bda33 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373064938904479_190_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373065116147926_191_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373065116147926_191_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca8368f2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373065116147926_191_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 1600, 17, 0, 1600, 17, 0, 3024, 2228224, 0, 3024, 2228224, 0, 3040, 2228224, 0, 3040, 2228224, 0, 6032, 536870912, 0, 6048, 536870912, 0, 7248, 139264, 0, 7248, 139264, 0, 7264, 139264, 0, 7264, 139264, 0, 9856, 1073741892, 0, 9856, 1073741892, 0, 9856, 1073741892, 0, 11392, 1073741892, 0, 11392, 1073741892, 0, 11392, 1073741892, 0, 12800, 1073741892, 0, 12800, 1073741892, 0, 12800, 1073741892, 0, 13504, 67108864, 0, 16192, 67108864, 0, 16640, 559240, 0, 16640, 559240, 0, 16640, 559240, 0, 16640, 559240, 0, 16640, 559240, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 960, 2047, 0, 1600, 17, 0, 1600, 17, 0, 3024, 2228224, 0, 3024, 2228224, 0, 3040, 2228224, 0, 3040, 2228224, 0, 6032, 536870912, 0, 6048, 536870912, 0, 7248, 139264, 0, 7248, 139264, 0, 7264, 139264, 0, 7264, 139264, 0, 9856, 1073741892, 0, 9856, 1073741892, 0, 9856, 1073741892, 0, 11392, 1073741892, 0, 11392, 1073741892, 0, 11392, 1073741892, 0, 12800, 1073741892, 0, 12800, 1073741892, 0, 12800, 1073741892, 0, 13504, 67108864, 0, 16192, 67108864, 0, 16640, 559240, 0, 16640, 559240, 0, 16640, 559240, 0, 16640, 559240, 0, 16640, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373068789042948_192_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373068789042948_192_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e0ad8ad9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373068789042948_192_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,313 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((290 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 528 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 8, 0, 2176, 73, 0, 2176, 73, 0, 2176, 73, 0, 5312, 128, 0, 5328, 128, 0, 7040, 8192, 0, 7680, 16, 0, 9552, 536870948, 0, 9552, 536870948, 0, 9552, 536870948, 0, 9568, 536870948, 0, 9568, 536870948, 0, 9568, 536870948, 0, 10512, 613416960, 0, 10512, 613416960, 0, 10512, 613416960, 0, 10512, 613416960, 0, 10516, 613416960, 0, 10516, 613416960, 0, 10516, 613416960, 0, 10516, 613416960, 0, 10528, 613416960, 0, 10528, 613416960, 0, 10528, 613416960, 0, 10528, 613416960, 0, 10532, 613416960, 0, 10532, 613416960, 0, 10532, 613416960, 0, 10532, 613416960, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 12928, 17, 0, 12928, 17, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 1472, 8, 0, 2176, 73, 0, 2176, 73, 0, 2176, 73, 0, 5312, 128, 0, 5328, 128, 0, 7040, 8192, 0, 7680, 16, 0, 9552, 536870948, 0, 9552, 536870948, 0, 9552, 536870948, 0, 9568, 536870948, 0, 9568, 536870948, 0, 9568, 536870948, 0, 10512, 613416960, 0, 10512, 613416960, 0, 10512, 613416960, 0, 10512, 613416960, 0, 10516, 613416960, 0, 10516, 613416960, 0, 10516, 613416960, 0, 10516, 613416960, 0, 10528, 613416960, 0, 10528, 613416960, 0, 10528, 613416960, 0, 10528, 613416960, 0, 10532, 613416960, 0, 10532, 613416960, 0, 10532, 613416960, 0, 10532, 613416960, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11728, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11744, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 11760, 1431655765, 0, 12928, 17, 0, 12928, 17, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0, 13824, 1145324612, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373203361344885_193_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373203361344885_193_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..43da001f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373203361344885_193_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,230 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 21))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3072, 2097152, 0, 2688, 2852126720, 0, 2688, 2852126720, 0, 2688, 2852126720, 0, 2688, 2852126720, 0, 2304, 131072, 0, 1920, 32768, 0, 3968, 17, 0, 3968, 17, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3328, 1431655765, 0, 3072, 2097152, 0, 2688, 2852126720, 0, 2688, 2852126720, 0, 2688, 2852126720, 0, 2688, 2852126720, 0, 2304, 131072, 0, 1920, 32768, 0, 3968, 17, 0, 3968, 17, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13248, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13264, 1145324612, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373205049278297_194_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373205049278297_194_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..da9f6896 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373205049278297_194_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,215 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 19))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 28))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [11584, 272696320, 0, 11584, 272696320, 0, 11584, 272696320, 0, 11584, 272696320, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 14848, 2097152, 0, 11584, 272696320, 0, 11584, 272696320, 0, 11584, 272696320, 0, 11584, 272696320, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 11904, 613566756, 0, 14848, 2097152, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373209620403562_196_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373209620403562_196_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d4468b53 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373209620403562_196_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 9))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((202 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2964, 64, 0, 2968, 64, 0, 2980, 64, 0, 2984, 64, 0, 2996, 64, 0, 3000, 64, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 7552, 545392672, 0, 7552, 545392672, 0, 7552, 545392672, 0, 7552, 545392672, 0, 7552, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 8384, 73, 0, 8384, 73, 0, 8384, 73, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 12928, 536870912, 0, 12932, 536870912, 0, 12944, 536870912, 0, 12948, 536870912, 0, 12960, 536870912, 0, 12964, 536870912, 0, 14848, 536870912, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 2964, 64, 0, 2968, 64, 0, 2980, 64, 0, 2984, 64, 0, 2996, 64, 0, 3000, 64, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 7552, 545392672, 0, 7552, 545392672, 0, 7552, 545392672, 0, 7552, 545392672, 0, 7552, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 8384, 73, 0, 8384, 73, 0, 8384, 73, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 9280, 613566756, 0, 12928, 536870912, 0, 12932, 536870912, 0, 12944, 536870912, 0, 12948, 536870912, 0, 12960, 536870912, 0, 12964, 536870912, 0, 14848, 536870912, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0, 15552, 4261412879, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373226496753867_198_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373226496753867_198_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab91356a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373226496753867_198_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1458 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 2, 0, 912, 2, 0, 928, 2, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 896, 2, 0, 912, 2, 0, 928, 2, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1920, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1936, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 1952, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3584, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3588, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3592, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3600, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3604, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3608, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3616, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3620, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 3624, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4160, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4176, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0, 4192, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373483278435188_201_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373483278435188_201_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4ec2e44d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373483278435188_201_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,323 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((280 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((349 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((364 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((371 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 366 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9472, 8389632, 0, 9472, 8389632, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 13184, 4194304, 0, 13200, 4194304, 0, 13216, 4194304, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15808, 85, 0, 15808, 85, 0, 15808, 85, 0, 15808, 85, 0, 17940, 4369, 0, 17940, 4369, 0, 17940, 4369, 0, 17940, 4369, 0, 17944, 4369, 0, 17944, 4369, 0, 17944, 4369, 0, 17944, 4369, 0, 17956, 4369, 0, 17956, 4369, 0, 17956, 4369, 0, 17956, 4369, 0, 17960, 4369, 0, 17960, 4369, 0, 17960, 4369, 0, 17960, 4369, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 9472, 8389632, 0, 9472, 8389632, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 10048, 2854922914, 0, 13184, 4194304, 0, 13200, 4194304, 0, 13216, 4194304, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15168, 1163219285, 0, 15808, 85, 0, 15808, 85, 0, 15808, 85, 0, 15808, 85, 0, 17940, 4369, 0, 17940, 4369, 0, 17940, 4369, 0, 17940, 4369, 0, 17944, 4369, 0, 17944, 4369, 0, 17944, 4369, 0, 17944, 4369, 0, 17956, 4369, 0, 17956, 4369, 0, 17956, 4369, 0, 17956, 4369, 0, 17960, 4369, 0, 17960, 4369, 0, 17960, 4369, 0, 17960, 4369, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0, 18816, 1145324612, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373491896126254_202_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373491896126254_202_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..713b9857 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373491896126254_202_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 5248, 73, 0, 5248, 73, 0, 5248, 73, 0, 5824, 272696336, 0, 5824, 272696336, 0, 5824, 272696336, 0, 5824, 272696336, 0, 5824, 272696336, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 5248, 73, 0, 5248, 73, 0, 5248, 73, 0, 5824, 272696336, 0, 5824, 272696336, 0, 5824, 272696336, 0, 5824, 272696336, 0, 5824, 272696336, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0, 6144, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373492453350498_203_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373492453350498_203_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..71d03d14 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373492453350498_203_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 4416, 2147483656, 0, 4416, 2147483656, 0, 10944, 64, 0, 10960, 64, 0, 10976, 64, 0, 12800, 17, 0, 12800, 17, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 1088, 4261412879, 0, 4416, 2147483656, 0, 4416, 2147483656, 0, 10944, 64, 0, 10960, 64, 0, 10976, 64, 0, 12800, 17, 0, 12800, 17, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 13696, 1717986918, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373562471659377_205_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373562471659377_205_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5f56c495 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373562471659377_205_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,194 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 4800, 1040, 0, 4800, 1040, 0, 4816, 1040, 0, 4816, 1040, 0, 4832, 1040, 0, 4832, 1040, 0, 5376, 16, 0, 5392, 16, 0, 5408, 16, 0, 6736, 603979812, 0, 6736, 603979812, 0, 6736, 603979812, 0, 6736, 603979812, 0, 6752, 603979812, 0, 6752, 603979812, 0, 6752, 603979812, 0, 6752, 603979812, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 3008, 2181570690, 0, 4800, 1040, 0, 4800, 1040, 0, 4816, 1040, 0, 4816, 1040, 0, 4832, 1040, 0, 4832, 1040, 0, 5376, 16, 0, 5392, 16, 0, 5408, 16, 0, 6736, 603979812, 0, 6736, 603979812, 0, 6736, 603979812, 0, 6736, 603979812, 0, 6752, 603979812, 0, 6752, 603979812, 0, 6752, 603979812, 0, 6752, 603979812, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373565678561683_206_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373565678561683_206_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b7d10d5d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373565678561683_206_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 10048, 1118208, 0, 10048, 1118208, 0, 10048, 1118208, 0, 11008, 4456448, 0, 11008, 4456448, 0, 11024, 4456448, 0, 11024, 4456448, 0, 11904, 559104, 0, 11904, 559104, 0, 11904, 559104, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 1088, 4286579199, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 8832, 8323072, 0, 10048, 1118208, 0, 10048, 1118208, 0, 10048, 1118208, 0, 11008, 4456448, 0, 11008, 4456448, 0, 11024, 4456448, 0, 11024, 4456448, 0, 11904, 559104, 0, 11904, 559104, 0, 11904, 559104, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373567039644199_207_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373567039644199_207_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0be4ff4b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373567039644199_207_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 28)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 9088, 268435456, 0, 9092, 268435456, 0, 9104, 268435456, 0, 9108, 268435456, 0, 11008, 268435456, 0, 11012, 268435456, 0, 11024, 268435456, 0, 11028, 268435456, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 6848, 1431655765, 0, 9088, 268435456, 0, 9092, 268435456, 0, 9104, 268435456, 0, 9108, 268435456, 0, 11008, 268435456, 0, 11012, 268435456, 0, 11024, 268435456, 0, 11028, 268435456, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373567758403798_208_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373567758403798_208_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a73231f1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373567758403798_208_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,134 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2944, 1090785345, 0, 2944, 1090785345, 0, 2944, 1090785345, 0, 2944, 1090785345, 0, 2944, 1090785345, 0, 2944, 1090785345, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 2944, 1090785345, 0, 2944, 1090785345, 0, 2944, 1090785345, 0, 2944, 1090785345, 0, 2944, 1090785345, 0, 2944, 1090785345, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3520, 1363481681, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0, 3840, 4294967295, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373568289369970_209_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373568289369970_209_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9a014cc7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373568289369970_209_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373568539415822_210_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373568539415822_210_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9ff18b38 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373568539415822_210_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,237 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [10048, 31, 0, 10048, 31, 0, 10048, 31, 0, 10048, 31, 0, 10048, 31, 0, 9664, 268435712, 0, 9664, 268435712, 0, 9408, 8396928, 0, 9408, 8396928, 0, 9408, 8396928, 0, 9024, 16781312, 0, 9024, 16781312, 0, 8640, 537919488, 0, 8640, 537919488, 0, 10688, 17, 0, 10688, 17, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 12032, 559240, 0, 12032, 559240, 0, 12032, 559240, 0, 12032, 559240, 0, 12032, 559240, 0, 10048, 31, 0, 10048, 31, 0, 10048, 31, 0, 10048, 31, 0, 10048, 31, 0, 9664, 268435712, 0, 9664, 268435712, 0, 9408, 8396928, 0, 9408, 8396928, 0, 9408, 8396928, 0, 9024, 16781312, 0, 9024, 16781312, 0, 8640, 537919488, 0, 8640, 537919488, 0, 10688, 17, 0, 10688, 17, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 11584, 1145324612, 0, 12032, 559240, 0, 12032, 559240, 0, 12032, 559240, 0, 12032, 559240, 0, 12032, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373571300252303_211_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373571300252303_211_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..56aafecc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373571300252303_211_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1744, 1056768, 0, 1744, 1056768, 0, 1760, 1056768, 0, 1760, 1056768, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 3344, 4194368, 0, 3344, 4194368, 0, 3360, 4194368, 0, 3360, 4194368, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1744, 1056768, 0, 1744, 1056768, 0, 1760, 1056768, 0, 1760, 1056768, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2640, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 2656, 2863311530, 0, 3344, 4194368, 0, 3344, 4194368, 0, 3360, 4194368, 0, 3360, 4194368, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373573375093063_212_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373573375093063_212_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..27637256 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373573375093063_212_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 31))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 4026531843, 0, 1216, 4026531843, 0, 1216, 4026531843, 0, 1216, 4026531843, 0, 1216, 4026531843, 0, 1216, 4026531843, 0, 5264, 67108864, 0, 5280, 67108864, 0, 5296, 67108864, 0, 8896, 4473924, 0, 8896, 4473924, 0, 8896, 4473924, 0, 8896, 4473924, 0, 8896, 4473924, 0, 8896, 4473924, 0, 10368, 2048, 0, 10384, 2048, 0, 11264, 60, 0, 11264, 60, 0, 11264, 60, 0, 11264, 60, 0, 1216, 4026531843, 0, 1216, 4026531843, 0, 1216, 4026531843, 0, 1216, 4026531843, 0, 1216, 4026531843, 0, 1216, 4026531843, 0, 5264, 67108864, 0, 5280, 67108864, 0, 5296, 67108864, 0, 8896, 4473924, 0, 8896, 4473924, 0, 8896, 4473924, 0, 8896, 4473924, 0, 8896, 4473924, 0, 8896, 4473924, 0, 10368, 2048, 0, 10384, 2048, 0, 11264, 60, 0, 11264, 60, 0, 11264, 60, 0, 11264, 60, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373582597438475_214_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373582597438475_214_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c4757db --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373582597438475_214_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,322 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 13))) { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6480, 536871424, 0, 6480, 536871424, 0, 6496, 536871424, 0, 6496, 536871424, 0, 11456, 559240, 0, 11456, 559240, 0, 11456, 559240, 0, 11456, 559240, 0, 11456, 559240, 0, 12096, 85, 0, 12096, 85, 0, 12096, 85, 0, 12096, 85, 0, 12864, 2852126720, 0, 12864, 2852126720, 0, 12864, 2852126720, 0, 12864, 2852126720, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 17664, 2621440, 0, 17664, 2621440, 0, 6480, 536871424, 0, 6480, 536871424, 0, 6496, 536871424, 0, 6496, 536871424, 0, 11456, 559240, 0, 11456, 559240, 0, 11456, 559240, 0, 11456, 559240, 0, 11456, 559240, 0, 12096, 85, 0, 12096, 85, 0, 12096, 85, 0, 12096, 85, 0, 12864, 2852126720, 0, 12864, 2852126720, 0, 12864, 2852126720, 0, 12864, 2852126720, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 15296, 10922, 0, 17664, 2621440, 0, 17664, 2621440, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373586647462771_215_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373586647462771_215_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..56ba9961 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373586647462771_215_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 10688, 545259520, 0, 10688, 545259520, 0, 13568, 8389120, 0, 13568, 8389120, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 10688, 545259520, 0, 10688, 545259520, 0, 13568, 8389120, 0, 13568, 8389120, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373587281383926_216_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373587281383926_216_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cef08656 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373587281383926_216_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,275 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1728, 2453667842, 0, 1728, 2453667842, 0, 1728, 2453667842, 0, 1728, 2453667842, 0, 1728, 2453667842, 0, 2688, 2, 0, 2704, 2, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 9216, 67436560, 0, 9216, 67436560, 0, 9216, 67436560, 0, 9216, 67436560, 0, 9856, 7, 0, 9856, 7, 0, 9856, 7, 0, 13440, 2155872769, 0, 13440, 2155872769, 0, 13440, 2155872769, 0, 13440, 2155872769, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1728, 2453667842, 0, 1728, 2453667842, 0, 1728, 2453667842, 0, 1728, 2453667842, 0, 1728, 2453667842, 0, 2688, 2, 0, 2704, 2, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 3200, 613566756, 0, 9216, 67436560, 0, 9216, 67436560, 0, 9216, 67436560, 0, 9216, 67436560, 0, 9856, 7, 0, 9856, 7, 0, 9856, 7, 0, 13440, 2155872769, 0, 13440, 2155872769, 0, 13440, 2155872769, 0, 13440, 2155872769, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373603654350045_218_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373603654350045_218_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ddc91969 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373603654350045_218_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,244 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1090785345, 0, 1216, 1090785345, 0, 1216, 1090785345, 0, 1216, 1090785345, 0, 1216, 1090785345, 0, 1216, 1090785345, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 8512, 545392672, 0, 8512, 545392672, 0, 8512, 545392672, 0, 8512, 545392672, 0, 8512, 545392672, 0, 11968, 1064962, 0, 11968, 1064962, 0, 11968, 1064962, 0, 11584, 3221225472, 0, 11584, 3221225472, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 1216, 1090785345, 0, 1216, 1090785345, 0, 1216, 1090785345, 0, 1216, 1090785345, 0, 1216, 1090785345, 0, 1216, 1090785345, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 5824, 1363481681, 0, 8512, 545392672, 0, 8512, 545392672, 0, 8512, 545392672, 0, 8512, 545392672, 0, 8512, 545392672, 0, 11968, 1064962, 0, 11968, 1064962, 0, 11968, 1064962, 0, 11584, 3221225472, 0, 11584, 3221225472, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0, 11200, 1021, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373606446580399_219_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373606446580399_219_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3a973aa2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373606446580399_219_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 528 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1430257665, 0, 1344, 1430257665, 0, 1344, 1430257665, 0, 1344, 1430257665, 0, 1344, 1430257665, 0, 1344, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 3520, 268435456, 0, 3536, 268435456, 0, 3552, 268435456, 0, 5568, 262144, 0, 5584, 262144, 0, 5600, 262144, 0, 6272, 1426063361, 0, 6272, 1426063361, 0, 6272, 1426063361, 0, 6272, 1426063361, 0, 6272, 1426063361, 0, 6288, 1426063361, 0, 6288, 1426063361, 0, 6288, 1426063361, 0, 6288, 1426063361, 0, 6288, 1426063361, 0, 6304, 1426063361, 0, 6304, 1426063361, 0, 6304, 1426063361, 0, 6304, 1426063361, 0, 6304, 1426063361, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 10560, 4, 0, 1344, 1430257665, 0, 1344, 1430257665, 0, 1344, 1430257665, 0, 1344, 1430257665, 0, 1344, 1430257665, 0, 1344, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1360, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 1376, 1430257665, 0, 3520, 268435456, 0, 3536, 268435456, 0, 3552, 268435456, 0, 5568, 262144, 0, 5584, 262144, 0, 5600, 262144, 0, 6272, 1426063361, 0, 6272, 1426063361, 0, 6272, 1426063361, 0, 6272, 1426063361, 0, 6272, 1426063361, 0, 6288, 1426063361, 0, 6288, 1426063361, 0, 6288, 1426063361, 0, 6288, 1426063361, 0, 6288, 1426063361, 0, 6304, 1426063361, 0, 6304, 1426063361, 0, 6304, 1426063361, 0, 6304, 1426063361, 0, 6304, 1426063361, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8400, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8416, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 8432, 1431655765, 0, 10560, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373624395063604_220_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373624395063604_220_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..946cd5fc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373624395063604_220_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4928, 4096, 0, 4944, 4096, 0, 4960, 4096, 0, 7104, 8, 0, 7120, 8, 0, 7136, 8, 0, 8576, 9, 0, 8576, 9, 0, 8592, 9, 0, 8592, 9, 0, 8608, 9, 0, 8608, 9, 0, 10048, 2415919104, 0, 10048, 2415919104, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 4928, 4096, 0, 4944, 4096, 0, 4960, 4096, 0, 7104, 8, 0, 7120, 8, 0, 7136, 8, 0, 8576, 9, 0, 8576, 9, 0, 8592, 9, 0, 8592, 9, 0, 8608, 9, 0, 8608, 9, 0, 10048, 2415919104, 0, 10048, 2415919104, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0, 11456, 4294967295, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373626777600977_221_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373626777600977_221_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d11d758a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373626777600977_221_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,247 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 11))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2432, 262144, 0, 5056, 4, 0, 5760, 1409286145, 0, 5760, 1409286145, 0, 5760, 1409286145, 0, 5760, 1409286145, 0, 8512, 1, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 10112, 131072, 0, 11024, 2818572288, 0, 11024, 2818572288, 0, 11024, 2818572288, 0, 11040, 2818572288, 0, 11040, 2818572288, 0, 11040, 2818572288, 0, 11056, 2818572288, 0, 11056, 2818572288, 0, 11056, 2818572288, 0, 14400, 32768, 0, 15040, 85, 0, 15040, 85, 0, 15040, 85, 0, 15040, 85, 0, 15680, 8, 0, 16576, 545392672, 0, 16576, 545392672, 0, 16576, 545392672, 0, 16576, 545392672, 0, 16576, 545392672, 0, 2432, 262144, 0, 5056, 4, 0, 5760, 1409286145, 0, 5760, 1409286145, 0, 5760, 1409286145, 0, 5760, 1409286145, 0, 8512, 1, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 9536, 1431655765, 0, 10112, 131072, 0, 11024, 2818572288, 0, 11024, 2818572288, 0, 11024, 2818572288, 0, 11040, 2818572288, 0, 11040, 2818572288, 0, 11040, 2818572288, 0, 11056, 2818572288, 0, 11056, 2818572288, 0, 11056, 2818572288, 0, 14400, 32768, 0, 15040, 85, 0, 15040, 85, 0, 15040, 85, 0, 15040, 85, 0, 15680, 8, 0, 16576, 545392672, 0, 16576, 545392672, 0, 16576, 545392672, 0, 16576, 545392672, 0, 16576, 545392672, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373641985515638_223_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373641985515638_223_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6bb7f1ad --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373641985515638_223_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 31))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 136348168, 0, 1152, 136348168, 0, 1152, 136348168, 0, 1152, 136348168, 0, 1152, 136348168, 0, 9536, 1073741825, 0, 9536, 1073741825, 0, 10368, 1073741889, 0, 10368, 1073741889, 0, 10368, 1073741889, 0, 11584, 272696336, 0, 11584, 272696336, 0, 11584, 272696336, 0, 11584, 272696336, 0, 11584, 272696336, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 1152, 136348168, 0, 1152, 136348168, 0, 1152, 136348168, 0, 1152, 136348168, 0, 1152, 136348168, 0, 9536, 1073741825, 0, 9536, 1073741825, 0, 10368, 1073741889, 0, 10368, 1073741889, 0, 10368, 1073741889, 0, 11584, 272696336, 0, 11584, 272696336, 0, 11584, 272696336, 0, 11584, 272696336, 0, 11584, 272696336, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0, 12224, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373661116699427_225_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373661116699427_225_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..46636604 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373661116699427_225_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 6036, 536870912, 0, 6040, 536870912, 0, 6044, 536870912, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 6036, 536870912, 0, 6040, 536870912, 0, 6044, 536870912, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0, 14336, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373662241880971_226_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373662241880971_226_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dfbb1454 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373662241880971_226_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,203 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 21))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 1088, 3758096511, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0, 832, 536346624, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373663437958684_227_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373663437958684_227_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c218360b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373663437958684_227_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,440 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((361 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((410 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (455 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((469 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((503 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((520 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (527 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6416, 2097160, 0, 6416, 2097160, 0, 6432, 2097160, 0, 6432, 2097160, 0, 8064, 2097152, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 14528, 85, 0, 14528, 85, 0, 14528, 85, 0, 14528, 85, 0, 16208, 524320, 0, 16208, 524320, 0, 17360, 2818572290, 0, 17360, 2818572290, 0, 17360, 2818572290, 0, 17360, 2818572290, 0, 18176, 73, 0, 18176, 73, 0, 18176, 73, 0, 20224, 4194304, 0, 22080, 4195328, 0, 22080, 4195328, 0, 22096, 4195328, 0, 22096, 4195328, 0, 22112, 4195328, 0, 22112, 4195328, 0, 23104, 4194304, 0, 23120, 4194304, 0, 23136, 4194304, 0, 26240, 16, 0, 26256, 16, 0, 26272, 16, 0, 29120, 2147483648, 0, 6416, 2097160, 0, 6416, 2097160, 0, 6432, 2097160, 0, 6432, 2097160, 0, 8064, 2097152, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 13888, 3067833782, 0, 14528, 85, 0, 14528, 85, 0, 14528, 85, 0, 14528, 85, 0, 16208, 524320, 0, 16208, 524320, 0, 17360, 2818572290, 0, 17360, 2818572290, 0, 17360, 2818572290, 0, 17360, 2818572290, 0, 18176, 73, 0, 18176, 73, 0, 18176, 73, 0, 20224, 4194304, 0, 22080, 4195328, 0, 22080, 4195328, 0, 22096, 4195328, 0, 22096, 4195328, 0, 22112, 4195328, 0, 22112, 4195328, 0, 23104, 4194304, 0, 23120, 4194304, 0, 23136, 4194304, 0, 26240, 16, 0, 26256, 16, 0, 26272, 16, 0, 29120, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373681236104648_228_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373681236104648_228_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4374da9b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373681236104648_228_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3408, 2097152, 0, 3424, 2097152, 0, 5312, 8192, 0, 5328, 8192, 0, 5344, 8192, 0, 6224, 536870912, 0, 6240, 536870912, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3408, 2097152, 0, 3424, 2097152, 0, 5312, 8192, 0, 5328, 8192, 0, 5344, 8192, 0, 6224, 536870912, 0, 6240, 536870912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373682341170779_229_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373682341170779_229_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..550b56a2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373682341170779_229_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 19))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 4194304, 0, 5264, 8192, 0, 7360, 2097152, 0, 2112, 4194304, 0, 5264, 8192, 0, 7360, 2097152, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373682585232209_230_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373682585232209_230_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b4796b77 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373682585232209_230_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3136, 128, 0, 3140, 128, 0, 3152, 128, 0, 3156, 128, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3136, 128, 0, 3140, 128, 0, 3152, 128, 0, 3156, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373682899315105_231_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373682899315105_231_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5b8437d8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373682899315105_231_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,193 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 16384, 0, 1040, 16384, 0, 3392, 16384, 0, 3408, 16384, 0, 4224, 73, 0, 4224, 73, 0, 4224, 73, 0, 4800, 272696336, 0, 4800, 272696336, 0, 4800, 272696336, 0, 4800, 272696336, 0, 4800, 272696336, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5760, 17, 0, 5760, 17, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 7104, 559240, 0, 7104, 559240, 0, 7104, 559240, 0, 7104, 559240, 0, 7104, 559240, 0, 1024, 16384, 0, 1040, 16384, 0, 3392, 16384, 0, 3408, 16384, 0, 4224, 73, 0, 4224, 73, 0, 4224, 73, 0, 4800, 272696336, 0, 4800, 272696336, 0, 4800, 272696336, 0, 4800, 272696336, 0, 4800, 272696336, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5120, 613566756, 0, 5760, 17, 0, 5760, 17, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 7104, 559240, 0, 7104, 559240, 0, 7104, 559240, 0, 7104, 559240, 0, 7104, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373686101465554_232_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373686101465554_232_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..880b4bd7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373686101465554_232_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 2, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 5696, 268439618, 0, 5696, 268439618, 0, 5696, 268439618, 0, 5696, 268439618, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5056, 17334272, 0, 5056, 17334272, 0, 5056, 17334272, 0, 4800, 134218752, 0, 4800, 134218752, 0, 6336, 73, 0, 6336, 73, 0, 6336, 73, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 960, 2, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 704, 1431655765, 0, 5696, 268439618, 0, 5696, 268439618, 0, 5696, 268439618, 0, 5696, 268439618, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5440, 2147483709, 0, 5056, 17334272, 0, 5056, 17334272, 0, 5056, 17334272, 0, 4800, 134218752, 0, 4800, 134218752, 0, 6336, 73, 0, 6336, 73, 0, 6336, 73, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0, 7232, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373687407304474_233_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373687407304474_233_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0960d318 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373687407304474_233_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373687607089919_234_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373687607089919_234_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..52182367 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373687607089919_234_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,258 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2756, 268435456, 0, 2760, 268435456, 0, 2772, 268435456, 0, 2776, 268435456, 0, 4160, 65536, 0, 4176, 65536, 0, 10368, 8, 0, 12480, 8390656, 0, 12480, 8390656, 0, 2756, 268435456, 0, 2760, 268435456, 0, 2772, 268435456, 0, 2776, 268435456, 0, 4160, 65536, 0, 4176, 65536, 0, 10368, 8, 0, 12480, 8390656, 0, 12480, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373692885035405_235_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373692885035405_235_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..34a833f3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373692885035405_235_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,330 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 25))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((218 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 534 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 9152, 1048832, 0, 9152, 1048832, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 11008, 8, 0, 11024, 8, 0, 11040, 8, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 15296, 85, 0, 15296, 85, 0, 15296, 85, 0, 15296, 85, 0, 17744, 134217729, 0, 17744, 134217729, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 768, 1, 0, 9152, 1048832, 0, 9152, 1048832, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 10048, 1145324612, 0, 11008, 8, 0, 11024, 8, 0, 11040, 8, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13956, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13960, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13972, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13976, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13988, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 13992, 2290649224, 0, 15296, 85, 0, 15296, 85, 0, 15296, 85, 0, 15296, 85, 0, 17744, 134217729, 0, 17744, 134217729, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18496, 1363481681, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0, 18816, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373720509456189_236_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373720509456189_236_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b936405e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373720509456189_236_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,102 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 135266880, 0, 2112, 135266880, 0, 2112, 135266880, 0, 2112, 135266880, 0, 1728, 8192, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 2112, 135266880, 0, 2112, 135266880, 0, 2112, 135266880, 0, 2112, 135266880, 0, 1728, 8192, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373720793025753_237_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373720793025753_237_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4f700e83 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373720793025753_237_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((111 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((135 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 738 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 1, 0, 1824, 1, 0, 1840, 1, 0, 5328, 31, 0, 5328, 31, 0, 5328, 31, 0, 5328, 31, 0, 5328, 31, 0, 5344, 31, 0, 5344, 31, 0, 5344, 31, 0, 5344, 31, 0, 5344, 31, 0, 5360, 31, 0, 5360, 31, 0, 5360, 31, 0, 5360, 31, 0, 5360, 31, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 7124, 85, 0, 7124, 85, 0, 7124, 85, 0, 7124, 85, 0, 7140, 85, 0, 7140, 85, 0, 7140, 85, 0, 7140, 85, 0, 7156, 85, 0, 7156, 85, 0, 7156, 85, 0, 7156, 85, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 9296, 63, 0, 9296, 63, 0, 9296, 63, 0, 9296, 63, 0, 9296, 63, 0, 9296, 63, 0, 9312, 63, 0, 9312, 63, 0, 9312, 63, 0, 9312, 63, 0, 9312, 63, 0, 9312, 63, 0, 9328, 63, 0, 9328, 63, 0, 9328, 63, 0, 9328, 63, 0, 9328, 63, 0, 9328, 63, 0, 1808, 1, 0, 1824, 1, 0, 1840, 1, 0, 5328, 31, 0, 5328, 31, 0, 5328, 31, 0, 5328, 31, 0, 5328, 31, 0, 5344, 31, 0, 5344, 31, 0, 5344, 31, 0, 5344, 31, 0, 5344, 31, 0, 5360, 31, 0, 5360, 31, 0, 5360, 31, 0, 5360, 31, 0, 5360, 31, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6484, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6500, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 6516, 4261412865, 0, 7124, 85, 0, 7124, 85, 0, 7124, 85, 0, 7124, 85, 0, 7140, 85, 0, 7140, 85, 0, 7140, 85, 0, 7140, 85, 0, 7156, 85, 0, 7156, 85, 0, 7156, 85, 0, 7156, 85, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8660, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8676, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 8692, 4292870207, 0, 9296, 63, 0, 9296, 63, 0, 9296, 63, 0, 9296, 63, 0, 9296, 63, 0, 9296, 63, 0, 9312, 63, 0, 9312, 63, 0, 9312, 63, 0, 9312, 63, 0, 9312, 63, 0, 9312, 63, 0, 9328, 63, 0, 9328, 63, 0, 9328, 63, 0, 9328, 63, 0, 9328, 63, 0, 9328, 63, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373867508344628_239_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373867508344628_239_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0b2f679 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373867508344628_239_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373867753401478_240_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373867753401478_240_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cd74d18a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373867753401478_240_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,101 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((26 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 2128, 1342177280, 0, 2128, 1342177280, 0, 2132, 1342177280, 0, 2132, 1342177280, 0, 2144, 1342177280, 0, 2144, 1342177280, 0, 2148, 1342177280, 0, 2148, 1342177280, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1680, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1684, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1696, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 1700, 1431633920, 0, 2128, 1342177280, 0, 2128, 1342177280, 0, 2132, 1342177280, 0, 2132, 1342177280, 0, 2144, 1342177280, 0, 2144, 1342177280, 0, 2148, 1342177280, 0, 2148, 1342177280, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373870116592832_241_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373870116592832_241_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..25c708c7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373870116592832_241_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,298 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((322 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 1, 0, 1056, 1, 0, 1072, 1, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 17664, 268501008, 0, 17664, 268501008, 0, 17664, 268501008, 0, 17680, 268501008, 0, 17680, 268501008, 0, 17680, 268501008, 0, 17696, 268501008, 0, 17696, 268501008, 0, 17696, 268501008, 0, 19392, 4, 0, 19408, 4, 0, 21760, 1145044992, 0, 21760, 1145044992, 0, 21760, 1145044992, 0, 21776, 1145044992, 0, 21776, 1145044992, 0, 21776, 1145044992, 0, 22208, 559240, 0, 22208, 559240, 0, 22208, 559240, 0, 22208, 559240, 0, 22208, 559240, 0, 1040, 1, 0, 1056, 1, 0, 1072, 1, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 4032, 286331153, 0, 17664, 268501008, 0, 17664, 268501008, 0, 17664, 268501008, 0, 17680, 268501008, 0, 17680, 268501008, 0, 17680, 268501008, 0, 17696, 268501008, 0, 17696, 268501008, 0, 17696, 268501008, 0, 19392, 4, 0, 19408, 4, 0, 21760, 1145044992, 0, 21760, 1145044992, 0, 21760, 1145044992, 0, 21776, 1145044992, 0, 21776, 1145044992, 0, 21776, 1145044992, 0, 22208, 559240, 0, 22208, 559240, 0, 22208, 559240, 0, 22208, 559240, 0, 22208, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373881922045335_242_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373881922045335_242_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9f4ceb62 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373881922045335_242_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7104, 17, 0, 7104, 17, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8448, 559240, 0, 8448, 559240, 0, 8448, 559240, 0, 8448, 559240, 0, 8448, 559240, 0, 7104, 17, 0, 7104, 17, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 7680, 286331153, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8000, 2004318071, 0, 8448, 559240, 0, 8448, 559240, 0, 8448, 559240, 0, 8448, 559240, 0, 8448, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756373882596509656_243_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756373882596509656_243_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..81eaa4ab --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756373882596509656_243_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 22))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3920, 32, 0, 3936, 32, 0, 5520, 8192, 0, 5536, 8192, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 9472, 2048, 0, 11904, 524288, 0, 576, 17, 0, 576, 17, 0, 3920, 32, 0, 3936, 32, 0, 5520, 8192, 0, 5536, 8192, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 5824, 1145324612, 0, 9472, 2048, 0, 11904, 524288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374170610518663_245_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374170610518663_245_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..94f3e1ab --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374170610518663_245_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,219 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 29))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((205 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 2320, 272696336, 0, 2320, 272696336, 0, 2320, 272696336, 0, 2320, 272696336, 0, 2320, 272696336, 0, 13120, 256, 0, 13124, 256, 0, 13136, 256, 0, 13140, 256, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 1616, 2181570690, 0, 2320, 272696336, 0, 2320, 272696336, 0, 2320, 272696336, 0, 2320, 272696336, 0, 2320, 272696336, 0, 13120, 256, 0, 13124, 256, 0, 13136, 256, 0, 13140, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374172752388311_246_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374172752388311_246_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..32bb51dd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374172752388311_246_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 524288, 0, 2496, 2147615240, 0, 2496, 2147615240, 0, 2496, 2147615240, 0, 2496, 2147615240, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2752, 524288, 0, 2496, 2147615240, 0, 2496, 2147615240, 0, 2496, 2147615240, 0, 2496, 2147615240, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0, 2112, 2146697216, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374173122585975_247_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374173122585975_247_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3b149c7e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374173122585975_247_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 16)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2816, 1073741832, 0, 2816, 1073741832, 0, 2832, 1073741832, 0, 2832, 1073741832, 0, 2848, 1073741832, 0, 2848, 1073741832, 0, 6464, 64, 0, 6480, 64, 0, 6496, 64, 0, 11520, 272696336, 0, 11520, 272696336, 0, 11520, 272696336, 0, 11520, 272696336, 0, 11520, 272696336, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13888, 65, 0, 13888, 65, 0, 14464, 1040, 0, 14464, 1040, 0, 14784, 16644, 0, 14784, 16644, 0, 14784, 16644, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2816, 1073741832, 0, 2816, 1073741832, 0, 2832, 1073741832, 0, 2832, 1073741832, 0, 2848, 1073741832, 0, 2848, 1073741832, 0, 6464, 64, 0, 6480, 64, 0, 6496, 64, 0, 11520, 272696336, 0, 11520, 272696336, 0, 11520, 272696336, 0, 11520, 272696336, 0, 11520, 272696336, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13056, 32767, 0, 13888, 65, 0, 13888, 65, 0, 14464, 1040, 0, 14464, 1040, 0, 14784, 16644, 0, 14784, 16644, 0, 14784, 16644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374179829750060_248_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374179829750060_248_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5727fbcf --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374179829750060_248_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,323 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 462 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 1280, 1073741824, 0, 1920, 65, 0, 1920, 65, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 4480, 16384, 0, 4496, 16384, 0, 4512, 16384, 0, 5380, 68157440, 0, 5380, 68157440, 0, 5384, 68157440, 0, 5384, 68157440, 0, 5396, 68157440, 0, 5396, 68157440, 0, 5400, 68157440, 0, 5400, 68157440, 0, 5412, 68157440, 0, 5412, 68157440, 0, 5416, 68157440, 0, 5416, 68157440, 0, 6848, 4, 0, 6864, 4, 0, 6880, 4, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 9856, 4096, 0, 9600, 85, 0, 9600, 85, 0, 9600, 85, 0, 9600, 85, 0, 9344, 1073741824, 0, 19392, 8, 0, 20288, 545392672, 0, 20288, 545392672, 0, 20288, 545392672, 0, 20288, 545392672, 0, 20288, 545392672, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 1280, 1073741824, 0, 1920, 65, 0, 1920, 65, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 4480, 16384, 0, 4496, 16384, 0, 4512, 16384, 0, 5380, 68157440, 0, 5380, 68157440, 0, 5384, 68157440, 0, 5384, 68157440, 0, 5396, 68157440, 0, 5396, 68157440, 0, 5400, 68157440, 0, 5400, 68157440, 0, 5412, 68157440, 0, 5412, 68157440, 0, 5416, 68157440, 0, 5416, 68157440, 0, 6848, 4, 0, 6864, 4, 0, 6880, 4, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 7296, 1431568384, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 10240, 2863311530, 0, 9856, 4096, 0, 9600, 85, 0, 9600, 85, 0, 9600, 85, 0, 9600, 85, 0, 9344, 1073741824, 0, 19392, 8, 0, 20288, 545392672, 0, 20288, 545392672, 0, 20288, 545392672, 0, 20288, 545392672, 0, 20288, 545392672, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374204302908411_249_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374204302908411_249_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8bafa577 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374204302908411_249_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 1073741824, 0, 3264, 272696336, 0, 3264, 272696336, 0, 3264, 272696336, 0, 3264, 272696336, 0, 3264, 272696336, 0, 4992, 8, 0, 6208, 2147483778, 0, 6208, 2147483778, 0, 6208, 2147483778, 0, 6224, 2147483778, 0, 6224, 2147483778, 0, 6224, 2147483778, 0, 6912, 2147483778, 0, 6912, 2147483778, 0, 6912, 2147483778, 0, 6928, 2147483778, 0, 6928, 2147483778, 0, 6928, 2147483778, 0, 2688, 1073741824, 0, 3264, 272696336, 0, 3264, 272696336, 0, 3264, 272696336, 0, 3264, 272696336, 0, 3264, 272696336, 0, 4992, 8, 0, 6208, 2147483778, 0, 6208, 2147483778, 0, 6208, 2147483778, 0, 6224, 2147483778, 0, 6224, 2147483778, 0, 6224, 2147483778, 0, 6912, 2147483778, 0, 6912, 2147483778, 0, 6912, 2147483778, 0, 6928, 2147483778, 0, 6928, 2147483778, 0, 6928, 2147483778, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374207195797981_250_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374207195797981_250_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..504edaaa --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374207195797981_250_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 0))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 5968, 1, 0, 5984, 1, 0, 6000, 1, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 7488, 65, 0, 7488, 65, 0, 8704, 68174084, 0, 8704, 68174084, 0, 8704, 68174084, 0, 8704, 68174084, 0, 8704, 68174084, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 1088, 4026531847, 0, 5968, 1, 0, 5984, 1, 0, 6000, 1, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 6656, 4026531847, 0, 7488, 65, 0, 7488, 65, 0, 8704, 68174084, 0, 8704, 68174084, 0, 8704, 68174084, 0, 8704, 68174084, 0, 8704, 68174084, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374208894161458_251_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374208894161458_251_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c6207cf2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374208894161458_251_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,181 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3728, 2147483648, 0, 3744, 2147483648, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 3728, 2147483648, 0, 3744, 2147483648, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0, 9344, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374312910295752_254_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374312910295752_254_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..25b87951 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374312910295752_254_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 4227858432, 0, 1920, 4227858432, 0, 1920, 4227858432, 0, 1920, 4227858432, 0, 1920, 4227858432, 0, 1920, 4227858432, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1280, 2052, 0, 1280, 2052, 0, 1920, 4227858432, 0, 1920, 4227858432, 0, 1920, 4227858432, 0, 1920, 4227858432, 0, 1920, 4227858432, 0, 1920, 4227858432, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1664, 66060288, 0, 1280, 2052, 0, 1280, 2052, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374313157018428_255_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374313157018428_255_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff0f0570 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374313157018428_255_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,261 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 16)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 666 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 4672, 8, 0, 4688, 8, 0, 4704, 8, 0, 5312, 85, 0, 5312, 85, 0, 5312, 85, 0, 5312, 85, 0, 8000, 536879112, 0, 8000, 536879112, 0, 8000, 536879112, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 10304, 134217728, 0, 11200, 73, 0, 11200, 73, 0, 11200, 73, 0, 11776, 272696336, 0, 11776, 272696336, 0, 11776, 272696336, 0, 11776, 272696336, 0, 11776, 272696336, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 3744, 2863311530, 0, 4672, 8, 0, 4688, 8, 0, 4704, 8, 0, 5312, 85, 0, 5312, 85, 0, 5312, 85, 0, 5312, 85, 0, 8000, 536879112, 0, 8000, 536879112, 0, 8000, 536879112, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9616, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 9632, 2854922922, 0, 10304, 134217728, 0, 11200, 73, 0, 11200, 73, 0, 11200, 73, 0, 11776, 272696336, 0, 11776, 272696336, 0, 11776, 272696336, 0, 11776, 272696336, 0, 11776, 272696336, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0, 12096, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374326369819224_256_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374326369819224_256_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c852b316 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374326369819224_256_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 29)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5136, 128, 0, 5152, 128, 0, 5760, 73, 0, 5760, 73, 0, 5760, 73, 0, 10896, 2147483648, 0, 10912, 2147483648, 0, 10928, 2147483648, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 13120, 272696336, 0, 13120, 272696336, 0, 13120, 272696336, 0, 13120, 272696336, 0, 13120, 272696336, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 5136, 128, 0, 5152, 128, 0, 5760, 73, 0, 5760, 73, 0, 5760, 73, 0, 10896, 2147483648, 0, 10912, 2147483648, 0, 10928, 2147483648, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 11904, 2181570690, 0, 13120, 272696336, 0, 13120, 272696336, 0, 13120, 272696336, 0, 13120, 272696336, 0, 13120, 272696336, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374367360373604_258_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374367360373604_258_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..efaa5330 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374367360373604_258_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,524 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 6))) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((329 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (353 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (408 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (421 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((467 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((476 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (487 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (498 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((515 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((524 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (528 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1920, 3221225479, 0, 1920, 3221225479, 0, 1920, 3221225479, 0, 1920, 3221225479, 0, 1920, 3221225479, 0, 3792, 8, 0, 3808, 8, 0, 4884, 73, 0, 4884, 73, 0, 4884, 73, 0, 4888, 73, 0, 4888, 73, 0, 4888, 73, 0, 4900, 73, 0, 4900, 73, 0, 4900, 73, 0, 4904, 73, 0, 4904, 73, 0, 4904, 73, 0, 7760, 262144, 0, 7776, 262144, 0, 11600, 2454192128, 0, 11600, 2454192128, 0, 11600, 2454192128, 0, 11600, 2454192128, 0, 11600, 2454192128, 0, 11616, 2454192128, 0, 11616, 2454192128, 0, 11616, 2454192128, 0, 11616, 2454192128, 0, 11616, 2454192128, 0, 11632, 2454192128, 0, 11632, 2454192128, 0, 11632, 2454192128, 0, 11632, 2454192128, 0, 11632, 2454192128, 0, 27904, 16384, 0, 31872, 2147483656, 0, 31872, 2147483656, 0, 32960, 8388608, 0, 32976, 8388608, 0, 32992, 8388608, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1088, 4026531903, 0, 1920, 3221225479, 0, 1920, 3221225479, 0, 1920, 3221225479, 0, 1920, 3221225479, 0, 1920, 3221225479, 0, 3792, 8, 0, 3808, 8, 0, 4884, 73, 0, 4884, 73, 0, 4884, 73, 0, 4888, 73, 0, 4888, 73, 0, 4888, 73, 0, 4900, 73, 0, 4900, 73, 0, 4900, 73, 0, 4904, 73, 0, 4904, 73, 0, 4904, 73, 0, 7760, 262144, 0, 7776, 262144, 0, 11600, 2454192128, 0, 11600, 2454192128, 0, 11600, 2454192128, 0, 11600, 2454192128, 0, 11600, 2454192128, 0, 11616, 2454192128, 0, 11616, 2454192128, 0, 11616, 2454192128, 0, 11616, 2454192128, 0, 11616, 2454192128, 0, 11632, 2454192128, 0, 11632, 2454192128, 0, 11632, 2454192128, 0, 11632, 2454192128, 0, 11632, 2454192128, 0, 27904, 16384, 0, 31872, 2147483656, 0, 31872, 2147483656, 0, 32960, 8388608, 0, 32976, 8388608, 0, 32992, 8388608, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374462450935104_259_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374462450935104_259_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c2e0328c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374462450935104_259_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,414 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 20)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 31)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 12)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 5))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((334 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (353 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3648, 2097664, 0, 3648, 2097664, 0, 3664, 2097664, 0, 3664, 2097664, 0, 4224, 268501008, 0, 4224, 268501008, 0, 4224, 268501008, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 9344, 8, 0, 9360, 8, 0, 9376, 8, 0, 13008, 128, 0, 13024, 128, 0, 13904, 2147483648, 0, 13920, 2147483648, 0, 14208, 8390656, 0, 14208, 8390656, 0, 15552, 73, 0, 15552, 73, 0, 15552, 73, 0, 16128, 272696336, 0, 16128, 272696336, 0, 16128, 272696336, 0, 16128, 272696336, 0, 16128, 272696336, 0, 3648, 2097664, 0, 3648, 2097664, 0, 3664, 2097664, 0, 3664, 2097664, 0, 4224, 268501008, 0, 4224, 268501008, 0, 4224, 268501008, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 8064, 1145324612, 0, 9344, 8, 0, 9360, 8, 0, 9376, 8, 0, 13008, 128, 0, 13024, 128, 0, 13904, 2147483648, 0, 13920, 2147483648, 0, 14208, 8390656, 0, 14208, 8390656, 0, 15552, 73, 0, 15552, 73, 0, 15552, 73, 0, 16128, 272696336, 0, 16128, 272696336, 0, 16128, 272696336, 0, 16128, 272696336, 0, 16128, 272696336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374532243812047_262_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374532243812047_262_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cb9d9d89 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374532243812047_262_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,379 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((411 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (433 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6656, 33554464, 0, 6656, 33554464, 0, 7936, 536870914, 0, 7936, 536870914, 0, 9536, 536870946, 0, 9536, 536870946, 0, 9536, 536870946, 0, 23680, 2281701384, 0, 23680, 2281701384, 0, 23680, 2281701384, 0, 24832, 2147483648, 0, 27008, 2147483648, 0, 27712, 2147483656, 0, 27712, 2147483656, 0, 576, 17, 0, 576, 17, 0, 6656, 33554464, 0, 6656, 33554464, 0, 7936, 536870914, 0, 7936, 536870914, 0, 9536, 536870946, 0, 9536, 536870946, 0, 9536, 536870946, 0, 23680, 2281701384, 0, 23680, 2281701384, 0, 23680, 2281701384, 0, 24832, 2147483648, 0, 27008, 2147483648, 0, 27712, 2147483656, 0, 27712, 2147483656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374662317223852_264_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374662317223852_264_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5ae8b5e7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374662317223852_264_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,310 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((202 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 468 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 268435456, 0, 1104, 268435456, 0, 3776, 81920, 0, 3776, 81920, 0, 3792, 81920, 0, 3792, 81920, 0, 4992, 262144, 0, 5008, 262144, 0, 5440, 262144, 0, 5456, 262144, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 18112, 2147491840, 0, 18112, 2147491840, 0, 20224, 7, 0, 20224, 7, 0, 20224, 7, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19712, 131072, 0, 1088, 268435456, 0, 1104, 268435456, 0, 3776, 81920, 0, 3776, 81920, 0, 3792, 81920, 0, 3792, 81920, 0, 4992, 262144, 0, 5008, 262144, 0, 5440, 262144, 0, 5456, 262144, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 6016, 1431655765, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 15616, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 17408, 2863311530, 0, 18112, 2147491840, 0, 18112, 2147491840, 0, 20224, 7, 0, 20224, 7, 0, 20224, 7, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19968, 1431655760, 0, 19712, 131072, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374669855247155_265_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374669855247155_265_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bbbeefc2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374669855247155_265_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 23))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374690520478516_267_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374690520478516_267_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c1116325 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374690520478516_267_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,293 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 19))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((168 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 678 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3328, 64, 0, 4736, 64, 0, 4752, 64, 0, 6144, 559240, 0, 6144, 559240, 0, 6144, 559240, 0, 6144, 559240, 0, 6144, 559240, 0, 8400, 71303424, 0, 8400, 71303424, 0, 8400, 71303424, 0, 8416, 71303424, 0, 8416, 71303424, 0, 8416, 71303424, 0, 10064, 1409286149, 0, 10064, 1409286149, 0, 10064, 1409286149, 0, 10064, 1409286149, 0, 10064, 1409286149, 0, 10068, 1409286149, 0, 10068, 1409286149, 0, 10068, 1409286149, 0, 10068, 1409286149, 0, 10068, 1409286149, 0, 10080, 1409286149, 0, 10080, 1409286149, 0, 10080, 1409286149, 0, 10080, 1409286149, 0, 10080, 1409286149, 0, 10084, 1409286149, 0, 10084, 1409286149, 0, 10084, 1409286149, 0, 10084, 1409286149, 0, 10084, 1409286149, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 12944, 16384, 0, 12960, 16384, 0, 14144, 17, 0, 14144, 17, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 18176, 8, 0, 576, 17, 0, 576, 17, 0, 3328, 64, 0, 4736, 64, 0, 4752, 64, 0, 6144, 559240, 0, 6144, 559240, 0, 6144, 559240, 0, 6144, 559240, 0, 6144, 559240, 0, 8400, 71303424, 0, 8400, 71303424, 0, 8400, 71303424, 0, 8416, 71303424, 0, 8416, 71303424, 0, 8416, 71303424, 0, 10064, 1409286149, 0, 10064, 1409286149, 0, 10064, 1409286149, 0, 10064, 1409286149, 0, 10064, 1409286149, 0, 10068, 1409286149, 0, 10068, 1409286149, 0, 10068, 1409286149, 0, 10068, 1409286149, 0, 10068, 1409286149, 0, 10080, 1409286149, 0, 10080, 1409286149, 0, 10080, 1409286149, 0, 10080, 1409286149, 0, 10080, 1409286149, 0, 10084, 1409286149, 0, 10084, 1409286149, 0, 10084, 1409286149, 0, 10084, 1409286149, 0, 10084, 1409286149, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10768, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10772, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10784, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 10788, 1409286485, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11792, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11796, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11808, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 11812, 1431633920, 0, 12944, 16384, 0, 12960, 16384, 0, 14144, 17, 0, 14144, 17, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 15040, 1145324612, 0, 18176, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374751733136007_269_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374751733136007_269_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..078aeb43 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374751733136007_269_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,246 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 85, 0, 2048, 85, 0, 2048, 85, 0, 2048, 85, 0, 7760, 32, 0, 7776, 32, 0, 9472, 2147483690, 0, 9472, 2147483690, 0, 9472, 2147483690, 0, 9472, 2147483690, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 85, 0, 2048, 85, 0, 2048, 85, 0, 2048, 85, 0, 7760, 32, 0, 7776, 32, 0, 9472, 2147483690, 0, 9472, 2147483690, 0, 9472, 2147483690, 0, 9472, 2147483690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374755369182440_271_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374755369182440_271_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2f67c522 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374755369182440_271_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 16))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 134218753, 0, 1856, 134218753, 0, 1856, 134218753, 0, 1600, 1056776, 0, 1600, 1056776, 0, 1600, 1056776, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 6720, 65540, 0, 6720, 65540, 0, 7872, 2147483652, 0, 7872, 2147483652, 0, 9088, 65536, 0, 15488, 272696336, 0, 15488, 272696336, 0, 15488, 272696336, 0, 15488, 272696336, 0, 15488, 272696336, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 1856, 134218753, 0, 1856, 134218753, 0, 1856, 134218753, 0, 1600, 1056776, 0, 1600, 1056776, 0, 1600, 1056776, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 4800, 1363481681, 0, 6720, 65540, 0, 6720, 65540, 0, 7872, 2147483652, 0, 7872, 2147483652, 0, 9088, 65536, 0, 15488, 272696336, 0, 15488, 272696336, 0, 15488, 272696336, 0, 15488, 272696336, 0, 15488, 272696336, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0, 15808, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374762732296037_273_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374762732296037_273_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..576bda33 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374762732296037_273_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374783986641186_275_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374783986641186_275_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2a2619ea --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374783986641186_275_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,374 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 354 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3328, 4, 0, 5184, 263168, 0, 5184, 263168, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 7552, 8388608, 0, 7296, 16777216, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 8896, 17, 0, 8896, 17, 0, 10816, 17, 0, 10816, 17, 0, 14144, 16842752, 0, 14144, 16842752, 0, 14160, 16842752, 0, 14160, 16842752, 0, 15488, 269484032, 0, 15488, 269484032, 0, 15504, 269484032, 0, 15504, 269484032, 0, 16640, 268435456, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21504, 559240, 0, 21504, 559240, 0, 21504, 559240, 0, 21504, 559240, 0, 21504, 559240, 0, 576, 17, 0, 576, 17, 0, 3328, 4, 0, 5184, 263168, 0, 5184, 263168, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 5632, 559240, 0, 7552, 8388608, 0, 7296, 16777216, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 7040, 2854922922, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 6784, 1414791168, 0, 8896, 17, 0, 8896, 17, 0, 10816, 17, 0, 10816, 17, 0, 14144, 16842752, 0, 14144, 16842752, 0, 14160, 16842752, 0, 14160, 16842752, 0, 15488, 269484032, 0, 15488, 269484032, 0, 15504, 269484032, 0, 15504, 269484032, 0, 16640, 268435456, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21056, 1145324612, 0, 21504, 559240, 0, 21504, 559240, 0, 21504, 559240, 0, 21504, 559240, 0, 21504, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756374792940071985_276_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756374792940071985_276_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8863c77b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756374792940071985_276_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((275 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((286 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1116 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 6352, 85, 0, 6352, 85, 0, 6352, 85, 0, 6352, 85, 0, 6356, 85, 0, 6356, 85, 0, 6356, 85, 0, 6356, 85, 0, 6368, 85, 0, 6368, 85, 0, 6368, 85, 0, 6368, 85, 0, 6372, 85, 0, 6372, 85, 0, 6372, 85, 0, 6372, 85, 0, 6384, 85, 0, 6384, 85, 0, 6384, 85, 0, 6384, 85, 0, 6388, 85, 0, 6388, 85, 0, 6388, 85, 0, 6388, 85, 0, 7568, 4096, 0, 7584, 4096, 0, 7600, 4096, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 9232, 1227096064, 0, 9232, 1227096064, 0, 9232, 1227096064, 0, 9232, 1227096064, 0, 9232, 1227096064, 0, 9248, 1227096064, 0, 9248, 1227096064, 0, 9248, 1227096064, 0, 9248, 1227096064, 0, 9248, 1227096064, 0, 10388, 1073741825, 0, 10388, 1073741825, 0, 10392, 1073741825, 0, 10392, 1073741825, 0, 10404, 1073741825, 0, 10404, 1073741825, 0, 10408, 1073741825, 0, 10408, 1073741825, 0, 11604, 2130440, 0, 11604, 2130440, 0, 11604, 2130440, 0, 11604, 2130440, 0, 11608, 2130440, 0, 11608, 2130440, 0, 11608, 2130440, 0, 11608, 2130440, 0, 11620, 2130440, 0, 11620, 2130440, 0, 11620, 2130440, 0, 11620, 2130440, 0, 11624, 2130440, 0, 11624, 2130440, 0, 11624, 2130440, 0, 11624, 2130440, 0, 12180, 2130440, 0, 12180, 2130440, 0, 12180, 2130440, 0, 12180, 2130440, 0, 12184, 2130440, 0, 12184, 2130440, 0, 12184, 2130440, 0, 12184, 2130440, 0, 12196, 2130440, 0, 12196, 2130440, 0, 12196, 2130440, 0, 12196, 2130440, 0, 12200, 2130440, 0, 12200, 2130440, 0, 12200, 2130440, 0, 12200, 2130440, 0, 12736, 272696336, 0, 12736, 272696336, 0, 12736, 272696336, 0, 12736, 272696336, 0, 12736, 272696336, 0, 17600, 545261568, 0, 17600, 545261568, 0, 17600, 545261568, 0, 17604, 545261568, 0, 17604, 545261568, 0, 17604, 545261568, 0, 17616, 545261568, 0, 17616, 545261568, 0, 17616, 545261568, 0, 17620, 545261568, 0, 17620, 545261568, 0, 17620, 545261568, 0, 18304, 545261568, 0, 18304, 545261568, 0, 18304, 545261568, 0, 18308, 545261568, 0, 18308, 545261568, 0, 18308, 545261568, 0, 18320, 545261568, 0, 18320, 545261568, 0, 18320, 545261568, 0, 18324, 545261568, 0, 18324, 545261568, 0, 18324, 545261568, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3280, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3284, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3296, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3300, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3312, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 3316, 1430258005, 0, 6352, 85, 0, 6352, 85, 0, 6352, 85, 0, 6352, 85, 0, 6356, 85, 0, 6356, 85, 0, 6356, 85, 0, 6356, 85, 0, 6368, 85, 0, 6368, 85, 0, 6368, 85, 0, 6368, 85, 0, 6372, 85, 0, 6372, 85, 0, 6372, 85, 0, 6372, 85, 0, 6384, 85, 0, 6384, 85, 0, 6384, 85, 0, 6384, 85, 0, 6388, 85, 0, 6388, 85, 0, 6388, 85, 0, 6388, 85, 0, 7568, 4096, 0, 7584, 4096, 0, 7600, 4096, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 8128, 1431655765, 0, 9232, 1227096064, 0, 9232, 1227096064, 0, 9232, 1227096064, 0, 9232, 1227096064, 0, 9232, 1227096064, 0, 9248, 1227096064, 0, 9248, 1227096064, 0, 9248, 1227096064, 0, 9248, 1227096064, 0, 9248, 1227096064, 0, 10388, 1073741825, 0, 10388, 1073741825, 0, 10392, 1073741825, 0, 10392, 1073741825, 0, 10404, 1073741825, 0, 10404, 1073741825, 0, 10408, 1073741825, 0, 10408, 1073741825, 0, 11604, 2130440, 0, 11604, 2130440, 0, 11604, 2130440, 0, 11604, 2130440, 0, 11608, 2130440, 0, 11608, 2130440, 0, 11608, 2130440, 0, 11608, 2130440, 0, 11620, 2130440, 0, 11620, 2130440, 0, 11620, 2130440, 0, 11620, 2130440, 0, 11624, 2130440, 0, 11624, 2130440, 0, 11624, 2130440, 0, 11624, 2130440, 0, 12180, 2130440, 0, 12180, 2130440, 0, 12180, 2130440, 0, 12180, 2130440, 0, 12184, 2130440, 0, 12184, 2130440, 0, 12184, 2130440, 0, 12184, 2130440, 0, 12196, 2130440, 0, 12196, 2130440, 0, 12196, 2130440, 0, 12196, 2130440, 0, 12200, 2130440, 0, 12200, 2130440, 0, 12200, 2130440, 0, 12200, 2130440, 0, 12736, 272696336, 0, 12736, 272696336, 0, 12736, 272696336, 0, 12736, 272696336, 0, 12736, 272696336, 0, 17600, 545261568, 0, 17600, 545261568, 0, 17600, 545261568, 0, 17604, 545261568, 0, 17604, 545261568, 0, 17604, 545261568, 0, 17616, 545261568, 0, 17616, 545261568, 0, 17616, 545261568, 0, 17620, 545261568, 0, 17620, 545261568, 0, 17620, 545261568, 0, 18304, 545261568, 0, 18304, 545261568, 0, 18304, 545261568, 0, 18308, 545261568, 0, 18308, 545261568, 0, 18308, 545261568, 0, 18320, 545261568, 0, 18320, 545261568, 0, 18320, 545261568, 0, 18324, 545261568, 0, 18324, 545261568, 0, 18324, 545261568, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756375171377702098_278_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756375171377702098_278_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..69fff037 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756375171377702098_278_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,453 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((370 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((386 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((397 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter5 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((419 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 552 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 1048832, 0, 3264, 1048832, 0, 5456, 1073741824, 0, 5472, 1073741824, 0, 5488, 1073741824, 0, 6160, 1073741824, 0, 6176, 1073741824, 0, 6192, 1073741824, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 8976, 16777217, 0, 8976, 16777217, 0, 8992, 16777217, 0, 8992, 16777217, 0, 9008, 16777217, 0, 9008, 16777217, 0, 10128, 1073741825, 0, 10128, 1073741825, 0, 10144, 1073741825, 0, 10144, 1073741825, 0, 10160, 1073741825, 0, 10160, 1073741825, 0, 10960, 1073741833, 0, 10960, 1073741833, 0, 10960, 1073741833, 0, 10976, 1073741833, 0, 10976, 1073741833, 0, 10976, 1073741833, 0, 10992, 1073741833, 0, 10992, 1073741833, 0, 10992, 1073741833, 0, 12432, 520, 0, 12432, 520, 0, 12448, 520, 0, 12448, 520, 0, 12464, 520, 0, 12464, 520, 0, 12992, 272696336, 0, 12992, 272696336, 0, 12992, 272696336, 0, 12992, 272696336, 0, 12992, 272696336, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13952, 17, 0, 13952, 17, 0, 18752, 64, 0, 20736, 4195328, 0, 20736, 4195328, 0, 21056, 67125252, 0, 21056, 67125252, 0, 21056, 67125252, 0, 22528, 134217728, 0, 22544, 134217728, 0, 22560, 134217728, 0, 23684, 2290089984, 0, 23684, 2290089984, 0, 23684, 2290089984, 0, 23688, 2290089984, 0, 23688, 2290089984, 0, 23688, 2290089984, 0, 23700, 2290089984, 0, 23700, 2290089984, 0, 23700, 2290089984, 0, 23704, 2290089984, 0, 23704, 2290089984, 0, 23704, 2290089984, 0, 23716, 2290089984, 0, 23716, 2290089984, 0, 23716, 2290089984, 0, 23720, 2290089984, 0, 23720, 2290089984, 0, 23720, 2290089984, 0, 26816, 524288, 0, 26832, 524288, 0, 26848, 524288, 0, 3264, 1048832, 0, 3264, 1048832, 0, 5456, 1073741824, 0, 5472, 1073741824, 0, 5488, 1073741824, 0, 6160, 1073741824, 0, 6176, 1073741824, 0, 6192, 1073741824, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 6592, 838860, 0, 8976, 16777217, 0, 8976, 16777217, 0, 8992, 16777217, 0, 8992, 16777217, 0, 9008, 16777217, 0, 9008, 16777217, 0, 10128, 1073741825, 0, 10128, 1073741825, 0, 10144, 1073741825, 0, 10144, 1073741825, 0, 10160, 1073741825, 0, 10160, 1073741825, 0, 10960, 1073741833, 0, 10960, 1073741833, 0, 10960, 1073741833, 0, 10976, 1073741833, 0, 10976, 1073741833, 0, 10976, 1073741833, 0, 10992, 1073741833, 0, 10992, 1073741833, 0, 10992, 1073741833, 0, 12432, 520, 0, 12432, 520, 0, 12448, 520, 0, 12448, 520, 0, 12464, 520, 0, 12464, 520, 0, 12992, 272696336, 0, 12992, 272696336, 0, 12992, 272696336, 0, 12992, 272696336, 0, 12992, 272696336, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13312, 613566756, 0, 13952, 17, 0, 13952, 17, 0, 18752, 64, 0, 20736, 4195328, 0, 20736, 4195328, 0, 21056, 67125252, 0, 21056, 67125252, 0, 21056, 67125252, 0, 22528, 134217728, 0, 22544, 134217728, 0, 22560, 134217728, 0, 23684, 2290089984, 0, 23684, 2290089984, 0, 23684, 2290089984, 0, 23688, 2290089984, 0, 23688, 2290089984, 0, 23688, 2290089984, 0, 23700, 2290089984, 0, 23700, 2290089984, 0, 23700, 2290089984, 0, 23704, 2290089984, 0, 23704, 2290089984, 0, 23704, 2290089984, 0, 23716, 2290089984, 0, 23716, 2290089984, 0, 23716, 2290089984, 0, 23720, 2290089984, 0, 23720, 2290089984, 0, 23720, 2290089984, 0, 26816, 524288, 0, 26832, 524288, 0, 26848, 524288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756375297820270124_280_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756375297820270124_280_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..985d10dd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756375297820270124_280_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 2816, 17, 0, 2816, 17, 0, 5504, 1073741828, 0, 5504, 1073741828, 0, 5520, 1073741828, 0, 5520, 1073741828, 0, 6336, 4, 0, 6352, 4, 0, 6784, 559240, 0, 6784, 559240, 0, 6784, 559240, 0, 6784, 559240, 0, 6784, 559240, 0, 576, 17, 0, 576, 17, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1472, 1717986918, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 1920, 978670, 0, 2816, 17, 0, 2816, 17, 0, 5504, 1073741828, 0, 5504, 1073741828, 0, 5520, 1073741828, 0, 5520, 1073741828, 0, 6336, 4, 0, 6352, 4, 0, 6784, 559240, 0, 6784, 559240, 0, 6784, 559240, 0, 6784, 559240, 0, 6784, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756375805449209187_286_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756375805449209187_286_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7a4276d4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756375805449209187_286_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,339 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((356 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((367 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7696, 68174084, 0, 7696, 68174084, 0, 7696, 68174084, 0, 7696, 68174084, 0, 7696, 68174084, 0, 7712, 68174084, 0, 7712, 68174084, 0, 7712, 68174084, 0, 7712, 68174084, 0, 7712, 68174084, 0, 24576, 85, 0, 24576, 85, 0, 24576, 85, 0, 24576, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7696, 68174084, 0, 7696, 68174084, 0, 7696, 68174084, 0, 7696, 68174084, 0, 7696, 68174084, 0, 7712, 68174084, 0, 7712, 68174084, 0, 7712, 68174084, 0, 7712, 68174084, 0, 7712, 68174084, 0, 24576, 85, 0, 24576, 85, 0, 24576, 85, 0, 24576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756375860954692177_288_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756375860954692177_288_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87e3db42 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756375860954692177_288_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,314 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 342 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 2112, 17, 0, 2112, 17, 0, 8464, 537002016, 0, 8464, 537002016, 0, 8464, 537002016, 0, 8480, 537002016, 0, 8480, 537002016, 0, 8480, 537002016, 0, 8496, 537002016, 0, 8496, 537002016, 0, 8496, 537002016, 0, 9616, 572522496, 0, 9616, 572522496, 0, 9616, 572522496, 0, 9632, 572522496, 0, 9632, 572522496, 0, 9632, 572522496, 0, 9648, 572522496, 0, 9648, 572522496, 0, 9648, 572522496, 0, 10432, 64, 0, 12416, 1078199360, 0, 12416, 1078199360, 0, 12416, 1078199360, 0, 12416, 1078199360, 0, 12416, 1078199360, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 13184, 559240, 0, 13184, 559240, 0, 13184, 559240, 0, 13184, 559240, 0, 13184, 559240, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 2112, 17, 0, 2112, 17, 0, 8464, 537002016, 0, 8464, 537002016, 0, 8464, 537002016, 0, 8480, 537002016, 0, 8480, 537002016, 0, 8480, 537002016, 0, 8496, 537002016, 0, 8496, 537002016, 0, 8496, 537002016, 0, 9616, 572522496, 0, 9616, 572522496, 0, 9616, 572522496, 0, 9632, 572522496, 0, 9632, 572522496, 0, 9632, 572522496, 0, 9648, 572522496, 0, 9648, 572522496, 0, 9648, 572522496, 0, 10432, 64, 0, 12416, 1078199360, 0, 12416, 1078199360, 0, 12416, 1078199360, 0, 12416, 1078199360, 0, 12416, 1078199360, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 12736, 1145324612, 0, 13184, 559240, 0, 13184, 559240, 0, 13184, 559240, 0, 13184, 559240, 0, 13184, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756375870369060481_289_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756375870369060481_289_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b2a2a453 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756375870369060481_289_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,130 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 3344, 128, 0, 3360, 128, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 3344, 128, 0, 3360, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756375871576634224_291_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756375871576634224_291_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e1369da8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756375871576634224_291_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,251 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 20)) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 31)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 23))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 546 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 5568, 285212945, 0, 5568, 285212945, 0, 5568, 285212945, 0, 5568, 285212945, 0, 5568, 285212945, 0, 5584, 285212945, 0, 5584, 285212945, 0, 5584, 285212945, 0, 5584, 285212945, 0, 5584, 285212945, 0, 5600, 285212945, 0, 5600, 285212945, 0, 5600, 285212945, 0, 5600, 285212945, 0, 5600, 285212945, 0, 6912, 17, 0, 6912, 17, 0, 6928, 17, 0, 6928, 17, 0, 6944, 17, 0, 6944, 17, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 11520, 1048609, 0, 11520, 1048609, 0, 11520, 1048609, 0, 11536, 1048609, 0, 11536, 1048609, 0, 11536, 1048609, 0, 12688, 69632, 0, 12688, 69632, 0, 12704, 69632, 0, 12704, 69632, 0, 15312, 8392768, 0, 15312, 8392768, 0, 15312, 8392768, 0, 15328, 8392768, 0, 15328, 8392768, 0, 15328, 8392768, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 5568, 285212945, 0, 5568, 285212945, 0, 5568, 285212945, 0, 5568, 285212945, 0, 5568, 285212945, 0, 5584, 285212945, 0, 5584, 285212945, 0, 5584, 285212945, 0, 5584, 285212945, 0, 5584, 285212945, 0, 5600, 285212945, 0, 5600, 285212945, 0, 5600, 285212945, 0, 5600, 285212945, 0, 5600, 285212945, 0, 6912, 17, 0, 6912, 17, 0, 6928, 17, 0, 6928, 17, 0, 6944, 17, 0, 6944, 17, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 7872, 286331153, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8960, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 8976, 572662306, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10048, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 10064, 1996488704, 0, 11520, 1048609, 0, 11520, 1048609, 0, 11520, 1048609, 0, 11536, 1048609, 0, 11536, 1048609, 0, 11536, 1048609, 0, 12688, 69632, 0, 12688, 69632, 0, 12704, 69632, 0, 12704, 69632, 0, 15312, 8392768, 0, 15312, 8392768, 0, 15312, 8392768, 0, 15328, 8392768, 0, 15328, 8392768, 0, 15328, 8392768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376145504047543_293_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376145504047543_293_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dd6ac34d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376145504047543_293_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1728, 4026531840, 0, 1728, 4026531840, 0, 1728, 4026531840, 0, 1728, 4026531840, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 576, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1024, 2047, 0, 1728, 4026531840, 0, 1728, 4026531840, 0, 1728, 4026531840, 0, 1728, 4026531840, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0, 2432, 4286578688, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376145775501626_294_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376145775501626_294_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2413d53d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376145775501626_294_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,525 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 19)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 6)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 27))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((382 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 20)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (403 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (425 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (444 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (454 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((479 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((486 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((493 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((513 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (530 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 21)) { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (540 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (547 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (551 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 402 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 1090519105, 0, 1472, 1090519105, 0, 1472, 1090519105, 0, 1472, 1090519105, 0, 2240, 1090519040, 0, 2240, 1090519040, 0, 3392, 1090519041, 0, 3392, 1090519041, 0, 3392, 1090519041, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 20096, 2818572290, 0, 20096, 2818572290, 0, 20096, 2818572290, 0, 20096, 2818572290, 0, 20544, 33554432, 0, 21824, 3221225479, 0, 21824, 3221225479, 0, 21824, 3221225479, 0, 21824, 3221225479, 0, 21824, 3221225479, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 24464, 1073741889, 0, 24464, 1073741889, 0, 24464, 1073741889, 0, 24480, 1073741889, 0, 24480, 1073741889, 0, 24480, 1073741889, 0, 24496, 1073741889, 0, 24496, 1073741889, 0, 24496, 1073741889, 0, 25536, 536870948, 0, 25536, 536870948, 0, 25536, 536870948, 0, 26496, 3758096387, 0, 26496, 3758096387, 0, 26496, 3758096387, 0, 26496, 3758096387, 0, 26496, 3758096387, 0, 27200, 134217728, 0, 28416, 269484288, 0, 28416, 269484288, 0, 28416, 269484288, 0, 29056, 73, 0, 29056, 73, 0, 29056, 73, 0, 30672, 2147483650, 0, 30672, 2147483650, 0, 30688, 2147483650, 0, 30688, 2147483650, 0, 31568, 524288, 0, 31584, 524288, 0, 34560, 536870912, 0, 35008, 612368384, 0, 35008, 612368384, 0, 35008, 612368384, 0, 1472, 1090519105, 0, 1472, 1090519105, 0, 1472, 1090519105, 0, 1472, 1090519105, 0, 2240, 1090519040, 0, 2240, 1090519040, 0, 3392, 1090519041, 0, 3392, 1090519041, 0, 3392, 1090519041, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 20096, 2818572290, 0, 20096, 2818572290, 0, 20096, 2818572290, 0, 20096, 2818572290, 0, 20544, 33554432, 0, 21824, 3221225479, 0, 21824, 3221225479, 0, 21824, 3221225479, 0, 21824, 3221225479, 0, 21824, 3221225479, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 23104, 3758096447, 0, 24464, 1073741889, 0, 24464, 1073741889, 0, 24464, 1073741889, 0, 24480, 1073741889, 0, 24480, 1073741889, 0, 24480, 1073741889, 0, 24496, 1073741889, 0, 24496, 1073741889, 0, 24496, 1073741889, 0, 25536, 536870948, 0, 25536, 536870948, 0, 25536, 536870948, 0, 26496, 3758096387, 0, 26496, 3758096387, 0, 26496, 3758096387, 0, 26496, 3758096387, 0, 26496, 3758096387, 0, 27200, 134217728, 0, 28416, 269484288, 0, 28416, 269484288, 0, 28416, 269484288, 0, 29056, 73, 0, 29056, 73, 0, 29056, 73, 0, 30672, 2147483650, 0, 30672, 2147483650, 0, 30688, 2147483650, 0, 30688, 2147483650, 0, 31568, 524288, 0, 31584, 524288, 0, 34560, 536870912, 0, 35008, 612368384, 0, 35008, 612368384, 0, 35008, 612368384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376188911373460_295_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376188911373460_295_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..be27bde2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376188911373460_295_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 18)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 67108864, 0, 1312, 67108864, 0, 5520, 537919488, 0, 5520, 537919488, 0, 5536, 537919488, 0, 5536, 537919488, 0, 10432, 33820680, 0, 10432, 33820680, 0, 10432, 33820680, 0, 10432, 33820680, 0, 10048, 67108864, 0, 9792, 7, 0, 9792, 7, 0, 9792, 7, 0, 9152, 5374080, 0, 9152, 5374080, 0, 9152, 5374080, 0, 9152, 5374080, 0, 1296, 67108864, 0, 1312, 67108864, 0, 5520, 537919488, 0, 5520, 537919488, 0, 5536, 537919488, 0, 5536, 537919488, 0, 10432, 33820680, 0, 10432, 33820680, 0, 10432, 33820680, 0, 10432, 33820680, 0, 10048, 67108864, 0, 9792, 7, 0, 9792, 7, 0, 9792, 7, 0, 9152, 5374080, 0, 9152, 5374080, 0, 9152, 5374080, 0, 9152, 5374080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376199599679136_297_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376199599679136_297_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5cb75abb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376199599679136_297_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,92 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376199741045095_298_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376199741045095_298_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf6b263a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376199741045095_298_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,256 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((200 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 492 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2496, 1, 0, 3584, 273, 0, 3584, 273, 0, 3584, 273, 0, 3600, 273, 0, 3600, 273, 0, 3600, 273, 0, 3616, 273, 0, 3616, 273, 0, 3616, 273, 0, 4992, 273, 0, 4992, 273, 0, 4992, 273, 0, 7440, 2, 0, 7456, 2, 0, 8656, 512, 0, 8672, 512, 0, 11792, 1140850756, 0, 11792, 1140850756, 0, 11792, 1140850756, 0, 11792, 1140850756, 0, 11808, 1140850756, 0, 11808, 1140850756, 0, 11808, 1140850756, 0, 11808, 1140850756, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 13248, 559240, 0, 13248, 559240, 0, 13248, 559240, 0, 13248, 559240, 0, 13248, 559240, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2496, 1, 0, 3584, 273, 0, 3584, 273, 0, 3584, 273, 0, 3600, 273, 0, 3600, 273, 0, 3600, 273, 0, 3616, 273, 0, 3616, 273, 0, 3616, 273, 0, 4992, 273, 0, 4992, 273, 0, 4992, 273, 0, 7440, 2, 0, 7456, 2, 0, 8656, 512, 0, 8672, 512, 0, 11792, 1140850756, 0, 11792, 1140850756, 0, 11792, 1140850756, 0, 11792, 1140850756, 0, 11808, 1140850756, 0, 11808, 1140850756, 0, 11808, 1140850756, 0, 11808, 1140850756, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12820, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12824, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12828, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12836, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12840, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 12844, 1145324612, 0, 13248, 559240, 0, 13248, 559240, 0, 13248, 559240, 0, 13248, 559240, 0, 13248, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376204290168272_299_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376204290168272_299_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3651414a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376204290168272_299_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((191 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((222 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((248 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter4 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1232, 1, 0, 2772, 2097152, 0, 3988, 4097, 0, 3988, 4097, 0, 4624, 1, 0, 10128, 2415919104, 0, 10128, 2415919104, 0, 13328, 36, 0, 13328, 36, 0, 13344, 36, 0, 13344, 36, 0, 14228, 613548032, 0, 14228, 613548032, 0, 14228, 613548032, 0, 14228, 613548032, 0, 14228, 613548032, 0, 14232, 613548032, 0, 14232, 613548032, 0, 14232, 613548032, 0, 14232, 613548032, 0, 14232, 613548032, 0, 14236, 613548032, 0, 14236, 613548032, 0, 14236, 613548032, 0, 14236, 613548032, 0, 14236, 613548032, 0, 14244, 613548032, 0, 14244, 613548032, 0, 14244, 613548032, 0, 14244, 613548032, 0, 14244, 613548032, 0, 14248, 613548032, 0, 14248, 613548032, 0, 14248, 613548032, 0, 14248, 613548032, 0, 14248, 613548032, 0, 14252, 613548032, 0, 14252, 613548032, 0, 14252, 613548032, 0, 14252, 613548032, 0, 14252, 613548032, 0, 15444, 36, 0, 15444, 36, 0, 15448, 36, 0, 15448, 36, 0, 15452, 36, 0, 15452, 36, 0, 15460, 36, 0, 15460, 36, 0, 15464, 36, 0, 15464, 36, 0, 15468, 36, 0, 15468, 36, 0, 15892, 292, 0, 15892, 292, 0, 15892, 292, 0, 15896, 292, 0, 15896, 292, 0, 15896, 292, 0, 15900, 292, 0, 15900, 292, 0, 15900, 292, 0, 15908, 292, 0, 15908, 292, 0, 15908, 292, 0, 15912, 292, 0, 15912, 292, 0, 15912, 292, 0, 15916, 292, 0, 15916, 292, 0, 15916, 292, 0, 1232, 1, 0, 2772, 2097152, 0, 3988, 4097, 0, 3988, 4097, 0, 4624, 1, 0, 10128, 2415919104, 0, 10128, 2415919104, 0, 13328, 36, 0, 13328, 36, 0, 13344, 36, 0, 13344, 36, 0, 14228, 613548032, 0, 14228, 613548032, 0, 14228, 613548032, 0, 14228, 613548032, 0, 14228, 613548032, 0, 14232, 613548032, 0, 14232, 613548032, 0, 14232, 613548032, 0, 14232, 613548032, 0, 14232, 613548032, 0, 14236, 613548032, 0, 14236, 613548032, 0, 14236, 613548032, 0, 14236, 613548032, 0, 14236, 613548032, 0, 14244, 613548032, 0, 14244, 613548032, 0, 14244, 613548032, 0, 14244, 613548032, 0, 14244, 613548032, 0, 14248, 613548032, 0, 14248, 613548032, 0, 14248, 613548032, 0, 14248, 613548032, 0, 14248, 613548032, 0, 14252, 613548032, 0, 14252, 613548032, 0, 14252, 613548032, 0, 14252, 613548032, 0, 14252, 613548032, 0, 15444, 36, 0, 15444, 36, 0, 15448, 36, 0, 15448, 36, 0, 15452, 36, 0, 15452, 36, 0, 15460, 36, 0, 15460, 36, 0, 15464, 36, 0, 15464, 36, 0, 15468, 36, 0, 15468, 36, 0, 15892, 292, 0, 15892, 292, 0, 15892, 292, 0, 15896, 292, 0, 15896, 292, 0, 15896, 292, 0, 15900, 292, 0, 15900, 292, 0, 15900, 292, 0, 15908, 292, 0, 15908, 292, 0, 15908, 292, 0, 15912, 292, 0, 15912, 292, 0, 15912, 292, 0, 15916, 292, 0, 15916, 292, 0, 15916, 292, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376234609337114_300_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376234609337114_300_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3badc664 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376234609337114_300_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((counter1 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1512 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 4026531843, 0, 1152, 4026531843, 0, 1152, 4026531843, 0, 1152, 4026531843, 0, 1152, 4026531843, 0, 1152, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 2244, 17, 0, 2244, 17, 0, 2248, 17, 0, 2248, 17, 0, 2260, 17, 0, 2260, 17, 0, 2264, 17, 0, 2264, 17, 0, 2276, 17, 0, 2276, 17, 0, 2280, 17, 0, 2280, 17, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3588, 559240, 0, 3588, 559240, 0, 3588, 559240, 0, 3588, 559240, 0, 3588, 559240, 0, 3592, 559240, 0, 3592, 559240, 0, 3592, 559240, 0, 3592, 559240, 0, 3592, 559240, 0, 3604, 559240, 0, 3604, 559240, 0, 3604, 559240, 0, 3604, 559240, 0, 3604, 559240, 0, 3608, 559240, 0, 3608, 559240, 0, 3608, 559240, 0, 3608, 559240, 0, 3608, 559240, 0, 3620, 559240, 0, 3620, 559240, 0, 3620, 559240, 0, 3620, 559240, 0, 3620, 559240, 0, 3624, 559240, 0, 3624, 559240, 0, 3624, 559240, 0, 3624, 559240, 0, 3624, 559240, 0, 1152, 4026531843, 0, 1152, 4026531843, 0, 1152, 4026531843, 0, 1152, 4026531843, 0, 1152, 4026531843, 0, 1152, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1168, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 1184, 4026531843, 0, 2244, 17, 0, 2244, 17, 0, 2248, 17, 0, 2248, 17, 0, 2260, 17, 0, 2260, 17, 0, 2264, 17, 0, 2264, 17, 0, 2276, 17, 0, 2276, 17, 0, 2280, 17, 0, 2280, 17, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2820, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2824, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2836, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2840, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2852, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 2856, 286331153, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3140, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3144, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3156, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3160, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3172, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3176, 2004318071, 0, 3588, 559240, 0, 3588, 559240, 0, 3588, 559240, 0, 3588, 559240, 0, 3588, 559240, 0, 3592, 559240, 0, 3592, 559240, 0, 3592, 559240, 0, 3592, 559240, 0, 3592, 559240, 0, 3604, 559240, 0, 3604, 559240, 0, 3604, 559240, 0, 3604, 559240, 0, 3604, 559240, 0, 3608, 559240, 0, 3608, 559240, 0, 3608, 559240, 0, 3608, 559240, 0, 3608, 559240, 0, 3620, 559240, 0, 3620, 559240, 0, 3620, 559240, 0, 3620, 559240, 0, 3620, 559240, 0, 3624, 559240, 0, 3624, 559240, 0, 3624, 559240, 0, 3624, 559240, 0, 3624, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376399994274031_303_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376399994274031_303_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a540973f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376399994274031_303_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,225 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 414 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 31, 0, 1664, 31, 0, 1664, 31, 0, 1664, 31, 0, 1664, 31, 0, 1280, 2147483648, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 2304, 17, 0, 2304, 17, 0, 3840, 537002016, 0, 3840, 537002016, 0, 3840, 537002016, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 5312, 2147483656, 0, 5312, 2147483656, 0, 7488, 2281701384, 0, 7488, 2281701384, 0, 7488, 2281701384, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 1664, 31, 0, 1664, 31, 0, 1664, 31, 0, 1664, 31, 0, 1664, 31, 0, 1280, 2147483648, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 896, 715827872, 0, 2304, 17, 0, 2304, 17, 0, 3840, 537002016, 0, 3840, 537002016, 0, 3840, 537002016, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 4160, 1145324612, 0, 5312, 2147483656, 0, 5312, 2147483656, 0, 7488, 2281701384, 0, 7488, 2281701384, 0, 7488, 2281701384, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8528, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0, 8544, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376403345073263_304_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376403345073263_304_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..63c89412 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376403345073263_304_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376403619685519_305_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376403619685519_305_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..97b636eb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376403619685519_305_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 3758096391, 0, 1088, 3758096391, 0, 1088, 3758096391, 0, 1088, 3758096391, 0, 1088, 3758096391, 0, 1088, 3758096391, 0, 1728, 9, 0, 1728, 9, 0, 2624, 536870916, 0, 2624, 536870916, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 1088, 3758096391, 0, 1088, 3758096391, 0, 1088, 3758096391, 0, 1088, 3758096391, 0, 1088, 3758096391, 0, 1088, 3758096391, 0, 1728, 9, 0, 1728, 9, 0, 2624, 536870916, 0, 2624, 536870916, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0, 3584, 3758096399, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376454882359053_308_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376454882359053_308_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2550f285 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376454882359053_308_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,243 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 22))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 2415919106, 0, 3584, 2415919106, 0, 3584, 2415919106, 0, 6336, 16, 0, 8320, 36, 0, 8320, 36, 0, 8336, 36, 0, 8336, 36, 0, 8352, 36, 0, 8352, 36, 0, 9856, 545392672, 0, 9856, 545392672, 0, 9856, 545392672, 0, 9856, 545392672, 0, 9856, 545392672, 0, 9872, 545392672, 0, 9872, 545392672, 0, 9872, 545392672, 0, 9872, 545392672, 0, 9872, 545392672, 0, 9888, 545392672, 0, 9888, 545392672, 0, 9888, 545392672, 0, 9888, 545392672, 0, 9888, 545392672, 0, 10688, 73, 0, 10688, 73, 0, 10688, 73, 0, 11264, 272696336, 0, 11264, 272696336, 0, 11264, 272696336, 0, 11264, 272696336, 0, 11264, 272696336, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 3584, 2415919106, 0, 3584, 2415919106, 0, 3584, 2415919106, 0, 6336, 16, 0, 8320, 36, 0, 8320, 36, 0, 8336, 36, 0, 8336, 36, 0, 8352, 36, 0, 8352, 36, 0, 9856, 545392672, 0, 9856, 545392672, 0, 9856, 545392672, 0, 9856, 545392672, 0, 9856, 545392672, 0, 9872, 545392672, 0, 9872, 545392672, 0, 9872, 545392672, 0, 9872, 545392672, 0, 9872, 545392672, 0, 9888, 545392672, 0, 9888, 545392672, 0, 9888, 545392672, 0, 9888, 545392672, 0, 9888, 545392672, 0, 10688, 73, 0, 10688, 73, 0, 10688, 73, 0, 11264, 272696336, 0, 11264, 272696336, 0, 11264, 272696336, 0, 11264, 272696336, 0, 11264, 272696336, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0, 11584, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376475793908019_310_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376475793908019_310_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..442523b3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376475793908019_310_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 342 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 256, 0, 2896, 256, 0, 7872, 256, 0, 7888, 256, 0, 9344, 16, 0, 10512, 285212673, 0, 10512, 285212673, 0, 10512, 285212673, 0, 10528, 285212673, 0, 10528, 285212673, 0, 10528, 285212673, 0, 11280, 268435456, 0, 11296, 268435456, 0, 11728, 268435456, 0, 11744, 268435456, 0, 12432, 268435473, 0, 12432, 268435473, 0, 12432, 268435473, 0, 12448, 268435473, 0, 12448, 268435473, 0, 12448, 268435473, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 14400, 73, 0, 14400, 73, 0, 14400, 73, 0, 14976, 272696336, 0, 14976, 272696336, 0, 14976, 272696336, 0, 14976, 272696336, 0, 14976, 272696336, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 2880, 256, 0, 2896, 256, 0, 7872, 256, 0, 7888, 256, 0, 9344, 16, 0, 10512, 285212673, 0, 10512, 285212673, 0, 10512, 285212673, 0, 10528, 285212673, 0, 10528, 285212673, 0, 10528, 285212673, 0, 11280, 268435456, 0, 11296, 268435456, 0, 11728, 268435456, 0, 11744, 268435456, 0, 12432, 268435473, 0, 12432, 268435473, 0, 12432, 268435473, 0, 12448, 268435473, 0, 12448, 268435473, 0, 12448, 268435473, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13312, 1145324612, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 13760, 838860, 0, 14400, 73, 0, 14400, 73, 0, 14400, 73, 0, 14976, 272696336, 0, 14976, 272696336, 0, 14976, 272696336, 0, 14976, 272696336, 0, 14976, 272696336, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0, 15296, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376580457999268_314_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376580457999268_314_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b72754dc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376580457999268_314_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 536870920, 0, 1152, 536870920, 0, 1168, 536870920, 0, 1168, 536870920, 0, 1184, 536870920, 0, 1184, 536870920, 0, 3968, 2101248, 0, 3968, 2101248, 0, 3984, 2101248, 0, 3984, 2101248, 0, 4000, 2101248, 0, 4000, 2101248, 0, 4608, 17, 0, 4608, 17, 0, 8144, 33554434, 0, 8144, 33554434, 0, 8148, 33554434, 0, 8148, 33554434, 0, 8152, 33554434, 0, 8152, 33554434, 0, 8160, 33554434, 0, 8160, 33554434, 0, 8164, 33554434, 0, 8164, 33554434, 0, 8168, 33554434, 0, 8168, 33554434, 0, 8176, 33554434, 0, 8176, 33554434, 0, 8180, 33554434, 0, 8180, 33554434, 0, 8184, 33554434, 0, 8184, 33554434, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8896, 559240, 0, 8896, 559240, 0, 8896, 559240, 0, 8896, 559240, 0, 8896, 559240, 0, 1152, 536870920, 0, 1152, 536870920, 0, 1168, 536870920, 0, 1168, 536870920, 0, 1184, 536870920, 0, 1184, 536870920, 0, 3968, 2101248, 0, 3968, 2101248, 0, 3984, 2101248, 0, 3984, 2101248, 0, 4000, 2101248, 0, 4000, 2101248, 0, 4608, 17, 0, 4608, 17, 0, 8144, 33554434, 0, 8144, 33554434, 0, 8148, 33554434, 0, 8148, 33554434, 0, 8152, 33554434, 0, 8152, 33554434, 0, 8160, 33554434, 0, 8160, 33554434, 0, 8164, 33554434, 0, 8164, 33554434, 0, 8168, 33554434, 0, 8168, 33554434, 0, 8176, 33554434, 0, 8176, 33554434, 0, 8180, 33554434, 0, 8180, 33554434, 0, 8184, 33554434, 0, 8184, 33554434, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8448, 1145324612, 0, 8896, 559240, 0, 8896, 559240, 0, 8896, 559240, 0, 8896, 559240, 0, 8896, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376589106242812_315_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376589106242812_315_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f3829331 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376589106242812_315_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,181 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 27))) { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5888, 268435456, 0, 5904, 268435456, 0, 6976, 8, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 9728, 131072, 0, 9744, 131072, 0, 9760, 131072, 0, 5888, 268435456, 0, 5904, 268435456, 0, 6976, 8, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8064, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 8080, 2181570690, 0, 9728, 131072, 0, 9744, 131072, 0, 9760, 131072, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376673500976094_317_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376673500976094_317_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0b781cd4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376673500976094_317_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 1024, 0, 3988, 128, 0, 3992, 128, 0, 3996, 128, 0, 4004, 128, 0, 4008, 128, 0, 4012, 128, 0, 5440, 33554432, 0, 6080, 85, 0, 6080, 85, 0, 6080, 85, 0, 6080, 85, 0, 8960, 559240, 0, 8960, 559240, 0, 8960, 559240, 0, 8960, 559240, 0, 8960, 559240, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 13120, 73, 0, 13120, 73, 0, 13120, 73, 0, 13696, 272696336, 0, 13696, 272696336, 0, 13696, 272696336, 0, 13696, 272696336, 0, 13696, 272696336, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 2624, 1024, 0, 3988, 128, 0, 3992, 128, 0, 3996, 128, 0, 4004, 128, 0, 4008, 128, 0, 4012, 128, 0, 5440, 33554432, 0, 6080, 85, 0, 6080, 85, 0, 6080, 85, 0, 6080, 85, 0, 8960, 559240, 0, 8960, 559240, 0, 8960, 559240, 0, 8960, 559240, 0, 8960, 559240, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 9536, 2863311530, 0, 13120, 73, 0, 13120, 73, 0, 13120, 73, 0, 13696, 272696336, 0, 13696, 272696336, 0, 13696, 272696336, 0, 13696, 272696336, 0, 13696, 272696336, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0, 14016, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756376687441571122_319_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756376687441571122_319_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b46d900b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756376687441571122_319_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377114834438689_321_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377114834438689_321_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..35f88696 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377114834438689_321_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 27))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 1600, 17, 0, 1600, 17, 0, 7104, 4, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 960, 1431655765, 0, 1600, 17, 0, 1600, 17, 0, 7104, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377322323415260_325_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377322323415260_325_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c0d94f2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377322323415260_325_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,244 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((215 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((246 << 6) | (i3 << 4)) | (i4 << 2)) | i5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((271 << 6) | (i3 << 4)) | (i4 << 2)) | i5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1074 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2048, 524304, 0, 2048, 524304, 0, 2064, 524304, 0, 2064, 524304, 0, 2080, 524304, 0, 2080, 524304, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 5316, 272696336, 0, 5316, 272696336, 0, 5316, 272696336, 0, 5316, 272696336, 0, 5316, 272696336, 0, 5320, 272696336, 0, 5320, 272696336, 0, 5320, 272696336, 0, 5320, 272696336, 0, 5320, 272696336, 0, 5332, 272696336, 0, 5332, 272696336, 0, 5332, 272696336, 0, 5332, 272696336, 0, 5332, 272696336, 0, 5336, 272696336, 0, 5336, 272696336, 0, 5336, 272696336, 0, 5336, 272696336, 0, 5336, 272696336, 0, 5348, 272696336, 0, 5348, 272696336, 0, 5348, 272696336, 0, 5348, 272696336, 0, 5348, 272696336, 0, 5352, 272696336, 0, 5352, 272696336, 0, 5352, 272696336, 0, 5352, 272696336, 0, 5352, 272696336, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 15744, 2147483648, 0, 15745, 2147483648, 0, 15746, 2147483648, 0, 15748, 2147483648, 0, 15749, 2147483648, 0, 15750, 2147483648, 0, 15760, 2147483648, 0, 15761, 2147483648, 0, 15762, 2147483648, 0, 15764, 2147483648, 0, 15765, 2147483648, 0, 15766, 2147483648, 0, 17344, 1, 0, 17345, 1, 0, 17346, 1, 0, 17348, 1, 0, 17349, 1, 0, 17350, 1, 0, 17360, 1, 0, 17361, 1, 0, 17362, 1, 0, 17364, 1, 0, 17365, 1, 0, 17366, 1, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18688, 85, 0, 18688, 85, 0, 18688, 85, 0, 18688, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2048, 524304, 0, 2048, 524304, 0, 2064, 524304, 0, 2064, 524304, 0, 2080, 524304, 0, 2080, 524304, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3076, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3080, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3092, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3096, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3108, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 3112, 2181570690, 0, 5316, 272696336, 0, 5316, 272696336, 0, 5316, 272696336, 0, 5316, 272696336, 0, 5316, 272696336, 0, 5320, 272696336, 0, 5320, 272696336, 0, 5320, 272696336, 0, 5320, 272696336, 0, 5320, 272696336, 0, 5332, 272696336, 0, 5332, 272696336, 0, 5332, 272696336, 0, 5332, 272696336, 0, 5332, 272696336, 0, 5336, 272696336, 0, 5336, 272696336, 0, 5336, 272696336, 0, 5336, 272696336, 0, 5336, 272696336, 0, 5348, 272696336, 0, 5348, 272696336, 0, 5348, 272696336, 0, 5348, 272696336, 0, 5348, 272696336, 0, 5352, 272696336, 0, 5352, 272696336, 0, 5352, 272696336, 0, 5352, 272696336, 0, 5352, 272696336, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10624, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 10640, 1431655765, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11776, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 11792, 4160749569, 0, 15744, 2147483648, 0, 15745, 2147483648, 0, 15746, 2147483648, 0, 15748, 2147483648, 0, 15749, 2147483648, 0, 15750, 2147483648, 0, 15760, 2147483648, 0, 15761, 2147483648, 0, 15762, 2147483648, 0, 15764, 2147483648, 0, 15765, 2147483648, 0, 15766, 2147483648, 0, 17344, 1, 0, 17345, 1, 0, 17346, 1, 0, 17348, 1, 0, 17349, 1, 0, 17350, 1, 0, 17360, 1, 0, 17361, 1, 0, 17362, 1, 0, 17364, 1, 0, 17365, 1, 0, 17366, 1, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18048, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18064, 1431655765, 0, 18688, 85, 0, 18688, 85, 0, 18688, 85, 0, 18688, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377466790696832_328_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377466790696832_328_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..63c89412 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377466790696832_328_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377477682557257_330_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377477682557257_330_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09c79978 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377477682557257_330_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,309 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((161 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((180 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 588 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 5072, 2147483648, 0, 5088, 2147483648, 0, 6416, 272696336, 0, 6416, 272696336, 0, 6416, 272696336, 0, 6416, 272696336, 0, 6416, 272696336, 0, 6432, 272696336, 0, 6432, 272696336, 0, 6432, 272696336, 0, 6432, 272696336, 0, 6432, 272696336, 0, 6864, 9362, 0, 6864, 9362, 0, 6864, 9362, 0, 6864, 9362, 0, 6864, 9362, 0, 6880, 9362, 0, 6880, 9362, 0, 6880, 9362, 0, 6880, 9362, 0, 6880, 9362, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 16592, 341, 0, 16592, 341, 0, 16592, 341, 0, 16592, 341, 0, 16592, 341, 0, 16608, 341, 0, 16608, 341, 0, 16608, 341, 0, 16608, 341, 0, 16608, 341, 0, 16624, 341, 0, 16624, 341, 0, 16624, 341, 0, 16624, 341, 0, 16624, 341, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 5072, 2147483648, 0, 5088, 2147483648, 0, 6416, 272696336, 0, 6416, 272696336, 0, 6416, 272696336, 0, 6416, 272696336, 0, 6416, 272696336, 0, 6432, 272696336, 0, 6432, 272696336, 0, 6432, 272696336, 0, 6432, 272696336, 0, 6432, 272696336, 0, 6864, 9362, 0, 6864, 9362, 0, 6864, 9362, 0, 6864, 9362, 0, 6864, 9362, 0, 6880, 9362, 0, 6880, 9362, 0, 6880, 9362, 0, 6880, 9362, 0, 6880, 9362, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 13440, 613566756, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 14992, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15008, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 15024, 1431655765, 0, 16592, 341, 0, 16592, 341, 0, 16592, 341, 0, 16592, 341, 0, 16592, 341, 0, 16608, 341, 0, 16608, 341, 0, 16608, 341, 0, 16608, 341, 0, 16608, 341, 0, 16624, 341, 0, 16624, 341, 0, 16624, 341, 0, 16624, 341, 0, 16624, 341, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377485768030102_331_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377485768030102_331_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44272c8b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377485768030102_331_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 1792, 67371072, 0, 1792, 67371072, 0, 1792, 67371072, 0, 1536, 1364262912, 0, 1536, 1364262912, 0, 1536, 1364262912, 0, 1536, 1364262912, 0, 1536, 1364262912, 0, 1536, 1364262912, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 2176, 2863311530, 0, 1792, 67371072, 0, 1792, 67371072, 0, 1792, 67371072, 0, 1536, 1364262912, 0, 1536, 1364262912, 0, 1536, 1364262912, 0, 1536, 1364262912, 0, 1536, 1364262912, 0, 1536, 1364262912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377485967403426_332_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377485967403426_332_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..25a07188 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377485967403426_332_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 23))) { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 23)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((174 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 30))) { + if ((WaveGetLaneIndex() == 27)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 7168, 536879136, 0, 7168, 536879136, 0, 7168, 536879136, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7936, 559240, 0, 7936, 559240, 0, 7936, 559240, 0, 7936, 559240, 0, 7936, 559240, 0, 8768, 1224736768, 0, 8768, 1224736768, 0, 8768, 1224736768, 0, 10688, 1073741824, 0, 10692, 1073741824, 0, 10704, 1073741824, 0, 10708, 1073741824, 0, 11136, 1224736768, 0, 11136, 1224736768, 0, 11136, 1224736768, 0, 11140, 1224736768, 0, 11140, 1224736768, 0, 11140, 1224736768, 0, 11152, 1224736768, 0, 11152, 1224736768, 0, 11152, 1224736768, 0, 11156, 1224736768, 0, 11156, 1224736768, 0, 11156, 1224736768, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 15296, 524800, 0, 15296, 524800, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 576, 17, 0, 576, 17, 0, 7168, 536879136, 0, 7168, 536879136, 0, 7168, 536879136, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7488, 1145324612, 0, 7936, 559240, 0, 7936, 559240, 0, 7936, 559240, 0, 7936, 559240, 0, 7936, 559240, 0, 8768, 1224736768, 0, 8768, 1224736768, 0, 8768, 1224736768, 0, 10688, 1073741824, 0, 10692, 1073741824, 0, 10704, 1073741824, 0, 10708, 1073741824, 0, 11136, 1224736768, 0, 11136, 1224736768, 0, 11136, 1224736768, 0, 11140, 1224736768, 0, 11140, 1224736768, 0, 11140, 1224736768, 0, 11152, 1224736768, 0, 11152, 1224736768, 0, 11152, 1224736768, 0, 11156, 1224736768, 0, 11156, 1224736768, 0, 11156, 1224736768, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 12352, 1363481681, 0, 15296, 524800, 0, 15296, 524800, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0, 15040, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377490004110512_333_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377490004110512_333_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..649cd5be --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377490004110512_333_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3712, 16842752, 0, 3712, 16842752, 0, 4416, 840957953, 0, 4416, 840957953, 0, 4416, 840957953, 0, 4416, 840957953, 0, 4416, 840957953, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 576, 17, 0, 576, 17, 0, 3712, 16842752, 0, 3712, 16842752, 0, 4416, 840957953, 0, 4416, 840957953, 0, 4416, 840957953, 0, 4416, 840957953, 0, 4416, 840957953, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 4736, 2004318071, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0, 5184, 1048575, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377490678983350_334_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377490678983350_334_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3624eedb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377490678983350_334_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,307 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((195 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 2208 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 2880, 1048832, 0, 2880, 1048832, 0, 3840, 33554432, 0, 3856, 33554432, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5764, 1, 0, 5768, 1, 0, 5780, 1, 0, 5784, 1, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 2880, 1048832, 0, 2880, 1048832, 0, 3840, 33554432, 0, 3856, 33554432, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5124, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5128, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5140, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5144, 838860819, 0, 5764, 1, 0, 5768, 1, 0, 5780, 1, 0, 5784, 1, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6340, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6344, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6356, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6360, 285282321, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6660, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6664, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6676, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 6680, 858993459, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7620, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7624, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7636, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 7640, 805306675, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 8704, 2004318071, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9792, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 9808, 2863311530, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10820, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10824, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10836, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 10840, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12484, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12488, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12500, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 12504, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13056, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0, 13072, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377578649780124_336_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377578649780124_336_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..150fb7a0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377578649780124_336_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 29)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 3152, 2684354560, 0, 3152, 2684354560, 0, 4304, 3221225472, 0, 4304, 3221225472, 0, 5952, 3758096384, 0, 5952, 3758096384, 0, 5952, 3758096384, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 896, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 912, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 928, 4286578688, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1344, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1360, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 1376, 4227858432, 0, 3152, 2684354560, 0, 3152, 2684354560, 0, 4304, 3221225472, 0, 4304, 3221225472, 0, 5952, 3758096384, 0, 5952, 3758096384, 0, 5952, 3758096384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377979193782493_342_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377979193782493_342_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9673cd75 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377979193782493_342_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 26)) { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 17))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 2818572288, 0, 1216, 2818572288, 0, 1216, 2818572288, 0, 5888, 73, 0, 5888, 73, 0, 5888, 73, 0, 8320, 16, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 14400, 4, 0, 14416, 4, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 2818572288, 0, 1216, 2818572288, 0, 1216, 2818572288, 0, 5888, 73, 0, 5888, 73, 0, 5888, 73, 0, 8320, 16, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 12160, 613566756, 0, 14400, 4, 0, 14416, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377980908080956_343_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377980908080956_343_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..138c36c3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377980908080956_343_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377981099217037_344_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377981099217037_344_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f86bec85 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377981099217037_344_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,160 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 18))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1744, 134217728, 0, 1760, 134217728, 0, 3856, 537010240, 0, 3856, 537010240, 0, 3856, 537010240, 0, 3856, 537010240, 0, 3872, 537010240, 0, 3872, 537010240, 0, 3872, 537010240, 0, 3872, 537010240, 0, 3888, 537010240, 0, 3888, 537010240, 0, 3888, 537010240, 0, 3888, 537010240, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 1744, 134217728, 0, 1760, 134217728, 0, 3856, 537010240, 0, 3856, 537010240, 0, 3856, 537010240, 0, 3856, 537010240, 0, 3872, 537010240, 0, 3872, 537010240, 0, 3872, 537010240, 0, 3872, 537010240, 0, 3888, 537010240, 0, 3888, 537010240, 0, 3888, 537010240, 0, 3888, 537010240, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5008, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5024, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 5040, 3758096447, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10384, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10400, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0, 10416, 2147483711, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377993419930262_345_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377993419930262_345_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..11a66831 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377993419930262_345_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,208 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 23)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 4480, 4, 0, 4484, 4, 0, 4496, 4, 0, 4500, 4, 0, 4512, 4, 0, 4516, 4, 0, 7504, 2290089984, 0, 7504, 2290089984, 0, 7504, 2290089984, 0, 7520, 2290089984, 0, 7520, 2290089984, 0, 7520, 2290089984, 0, 8208, 2147483648, 0, 8224, 2147483648, 0, 768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 4480, 4, 0, 4484, 4, 0, 4496, 4, 0, 4500, 4, 0, 4512, 4, 0, 4516, 4, 0, 7504, 2290089984, 0, 7504, 2290089984, 0, 7504, 2290089984, 0, 7520, 2290089984, 0, 7520, 2290089984, 0, 7520, 2290089984, 0, 8208, 2147483648, 0, 8224, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756377996474394826_346_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756377996474394826_346_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e371e895 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756377996474394826_346_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,247 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1984, 268501008, 0, 1984, 268501008, 0, 1984, 268501008, 0, 2624, 1048832, 0, 2624, 1048832, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 4864, 73, 0, 4864, 73, 0, 4864, 73, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 768, 1, 0, 1984, 268501008, 0, 1984, 268501008, 0, 1984, 268501008, 0, 2624, 1048832, 0, 2624, 1048832, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 4864, 73, 0, 4864, 73, 0, 4864, 73, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378052991384419_349_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378052991384419_349_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a79bee34 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378052991384419_349_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 4, 0, 3520, 73, 0, 3520, 73, 0, 3520, 73, 0, 4096, 272696336, 0, 4096, 272696336, 0, 4096, 272696336, 0, 4096, 272696336, 0, 4096, 272696336, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 2176, 4, 0, 3520, 73, 0, 3520, 73, 0, 3520, 73, 0, 4096, 272696336, 0, 4096, 272696336, 0, 4096, 272696336, 0, 4096, 272696336, 0, 4096, 272696336, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0, 4416, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378204749219940_353_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378204749219940_353_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..30817558 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378204749219940_353_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,307 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 16)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [15680, 4026531841, 0, 15680, 4026531841, 0, 15680, 4026531841, 0, 15680, 4026531841, 0, 15680, 4026531841, 0, 18816, 536870912, 0, 15680, 4026531841, 0, 15680, 4026531841, 0, 15680, 4026531841, 0, 15680, 4026531841, 0, 15680, 4026531841, 0, 18816, 536870912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378374167551417_355_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378374167551417_355_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bc75ef84 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378374167551417_355_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 1, 0, 2816, 1140850692, 0, 2816, 1140850692, 0, 2816, 1140850692, 0, 2832, 1140850692, 0, 2832, 1140850692, 0, 2832, 1140850692, 0, 2848, 1140850692, 0, 2848, 1140850692, 0, 2848, 1140850692, 0, 4608, 1073741828, 0, 4608, 1073741828, 0, 4624, 1073741828, 0, 4624, 1073741828, 0, 4640, 1073741828, 0, 4640, 1073741828, 0, 7104, 3221225479, 0, 7104, 3221225479, 0, 7104, 3221225479, 0, 7104, 3221225479, 0, 7104, 3221225479, 0, 1024, 1, 0, 2816, 1140850692, 0, 2816, 1140850692, 0, 2816, 1140850692, 0, 2832, 1140850692, 0, 2832, 1140850692, 0, 2832, 1140850692, 0, 2848, 1140850692, 0, 2848, 1140850692, 0, 2848, 1140850692, 0, 4608, 1073741828, 0, 4608, 1073741828, 0, 4624, 1073741828, 0, 4624, 1073741828, 0, 4640, 1073741828, 0, 4640, 1073741828, 0, 7104, 3221225479, 0, 7104, 3221225479, 0, 7104, 3221225479, 0, 7104, 3221225479, 0, 7104, 3221225479, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378374650691037_356_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378374650691037_356_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8699a36e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378374650691037_356_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,317 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((240 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5696, 65, 0, 5696, 65, 0, 12736, 68174084, 0, 12736, 68174084, 0, 12736, 68174084, 0, 12736, 68174084, 0, 12736, 68174084, 0, 15360, 268435473, 0, 15360, 268435473, 0, 15360, 268435473, 0, 15364, 268435473, 0, 15364, 268435473, 0, 15364, 268435473, 0, 15376, 268435473, 0, 15376, 268435473, 0, 15376, 268435473, 0, 15380, 268435473, 0, 15380, 268435473, 0, 15380, 268435473, 0, 15392, 268435473, 0, 15392, 268435473, 0, 15392, 268435473, 0, 15396, 268435473, 0, 15396, 268435473, 0, 15396, 268435473, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16704, 559240, 0, 16704, 559240, 0, 16704, 559240, 0, 16704, 559240, 0, 16704, 559240, 0, 5696, 65, 0, 5696, 65, 0, 12736, 68174084, 0, 12736, 68174084, 0, 12736, 68174084, 0, 12736, 68174084, 0, 12736, 68174084, 0, 15360, 268435473, 0, 15360, 268435473, 0, 15360, 268435473, 0, 15364, 268435473, 0, 15364, 268435473, 0, 15364, 268435473, 0, 15376, 268435473, 0, 15376, 268435473, 0, 15376, 268435473, 0, 15380, 268435473, 0, 15380, 268435473, 0, 15380, 268435473, 0, 15392, 268435473, 0, 15392, 268435473, 0, 15392, 268435473, 0, 15396, 268435473, 0, 15396, 268435473, 0, 15396, 268435473, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16704, 559240, 0, 16704, 559240, 0, 16704, 559240, 0, 16704, 559240, 0, 16704, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378389475916530_358_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378389475916530_358_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4309f6fa --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378389475916530_358_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5824, 268439552, 0, 5824, 268439552, 0, 7488, 67108864, 0, 9664, 136348160, 0, 9664, 136348160, 0, 9664, 136348160, 0, 9664, 136348160, 0, 9680, 136348160, 0, 9680, 136348160, 0, 9680, 136348160, 0, 9680, 136348160, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 5824, 268439552, 0, 5824, 268439552, 0, 7488, 67108864, 0, 9664, 136348160, 0, 9664, 136348160, 0, 9664, 136348160, 0, 9664, 136348160, 0, 9680, 136348160, 0, 9680, 136348160, 0, 9680, 136348160, 0, 9680, 136348160, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0, 15168, 76695844, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378391913632083_359_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378391913632083_359_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..337f6ce6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378391913632083_359_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,454 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 25)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 23))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 17))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((372 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((398 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((418 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((440 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((451 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((458 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((469 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + if ((i4 == 2)) { + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 24))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (497 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (501 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 528 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7616, 1048833, 0, 7616, 1048833, 0, 7616, 1048833, 0, 8832, 3, 0, 8832, 3, 0, 10496, 33620224, 0, 10496, 33620224, 0, 10496, 33620224, 0, 11648, 2147483648, 0, 12560, 2147483648, 0, 12576, 2147483648, 0, 12592, 2147483648, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13824, 3221225472, 0, 13824, 3221225472, 0, 14464, 17, 0, 14464, 17, 0, 25472, 1024, 0, 25488, 1024, 0, 25504, 1024, 0, 26756, 1145044992, 0, 26756, 1145044992, 0, 26756, 1145044992, 0, 26760, 1145044992, 0, 26760, 1145044992, 0, 26760, 1145044992, 0, 26772, 1145044992, 0, 26772, 1145044992, 0, 26772, 1145044992, 0, 26776, 1145044992, 0, 26776, 1145044992, 0, 26776, 1145044992, 0, 26788, 1145044992, 0, 26788, 1145044992, 0, 26788, 1145044992, 0, 26792, 1145044992, 0, 26792, 1145044992, 0, 26792, 1145044992, 0, 30020, 1140850756, 0, 30020, 1140850756, 0, 30020, 1140850756, 0, 30020, 1140850756, 0, 30024, 1140850756, 0, 30024, 1140850756, 0, 30024, 1140850756, 0, 30024, 1140850756, 0, 30036, 1140850756, 0, 30036, 1140850756, 0, 30036, 1140850756, 0, 30036, 1140850756, 0, 30040, 1140850756, 0, 30040, 1140850756, 0, 30040, 1140850756, 0, 30040, 1140850756, 0, 30052, 1140850756, 0, 30052, 1140850756, 0, 30052, 1140850756, 0, 30052, 1140850756, 0, 30056, 1140850756, 0, 30056, 1140850756, 0, 30056, 1140850756, 0, 30056, 1140850756, 0, 7616, 1048833, 0, 7616, 1048833, 0, 7616, 1048833, 0, 8832, 3, 0, 8832, 3, 0, 10496, 33620224, 0, 10496, 33620224, 0, 10496, 33620224, 0, 11648, 2147483648, 0, 12560, 2147483648, 0, 12576, 2147483648, 0, 12592, 2147483648, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13136, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13152, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13168, 4286578688, 0, 13824, 3221225472, 0, 13824, 3221225472, 0, 14464, 17, 0, 14464, 17, 0, 25472, 1024, 0, 25488, 1024, 0, 25504, 1024, 0, 26756, 1145044992, 0, 26756, 1145044992, 0, 26756, 1145044992, 0, 26760, 1145044992, 0, 26760, 1145044992, 0, 26760, 1145044992, 0, 26772, 1145044992, 0, 26772, 1145044992, 0, 26772, 1145044992, 0, 26776, 1145044992, 0, 26776, 1145044992, 0, 26776, 1145044992, 0, 26788, 1145044992, 0, 26788, 1145044992, 0, 26788, 1145044992, 0, 26792, 1145044992, 0, 26792, 1145044992, 0, 26792, 1145044992, 0, 30020, 1140850756, 0, 30020, 1140850756, 0, 30020, 1140850756, 0, 30020, 1140850756, 0, 30024, 1140850756, 0, 30024, 1140850756, 0, 30024, 1140850756, 0, 30024, 1140850756, 0, 30036, 1140850756, 0, 30036, 1140850756, 0, 30036, 1140850756, 0, 30036, 1140850756, 0, 30040, 1140850756, 0, 30040, 1140850756, 0, 30040, 1140850756, 0, 30040, 1140850756, 0, 30052, 1140850756, 0, 30052, 1140850756, 0, 30052, 1140850756, 0, 30052, 1140850756, 0, 30056, 1140850756, 0, 30056, 1140850756, 0, 30056, 1140850756, 0, 30056, 1140850756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378658710633763_361_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378658710633763_361_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c94f8a1b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378658710633763_361_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,239 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 22))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 462 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 2147483651, 0, 1088, 2147483651, 0, 1088, 2147483651, 0, 2176, 1, 0, 2192, 1, 0, 2208, 1, 0, 7680, 2147483651, 0, 7680, 2147483651, 0, 7680, 2147483651, 0, 7684, 2147483651, 0, 7684, 2147483651, 0, 7684, 2147483651, 0, 7688, 2147483651, 0, 7688, 2147483651, 0, 7688, 2147483651, 0, 7696, 2147483651, 0, 7696, 2147483651, 0, 7696, 2147483651, 0, 7700, 2147483651, 0, 7700, 2147483651, 0, 7700, 2147483651, 0, 7704, 2147483651, 0, 7704, 2147483651, 0, 7704, 2147483651, 0, 7712, 2147483651, 0, 7712, 2147483651, 0, 7712, 2147483651, 0, 7716, 2147483651, 0, 7716, 2147483651, 0, 7716, 2147483651, 0, 7720, 2147483651, 0, 7720, 2147483651, 0, 7720, 2147483651, 0, 8384, 2147483651, 0, 8384, 2147483651, 0, 8384, 2147483651, 0, 9024, 73, 0, 9024, 73, 0, 9024, 73, 0, 10496, 65536, 0, 10512, 65536, 0, 11648, 2147483666, 0, 11648, 2147483666, 0, 11648, 2147483666, 0, 11664, 2147483666, 0, 11664, 2147483666, 0, 11664, 2147483666, 0, 12352, 2415919122, 0, 12352, 2415919122, 0, 12352, 2415919122, 0, 12352, 2415919122, 0, 12368, 2415919122, 0, 12368, 2415919122, 0, 12368, 2415919122, 0, 12368, 2415919122, 0, 14208, 2415919104, 0, 14208, 2415919104, 0, 14224, 2415919104, 0, 14224, 2415919104, 0, 15168, 613416960, 0, 15168, 613416960, 0, 15168, 613416960, 0, 15168, 613416960, 0, 15184, 613416960, 0, 15184, 613416960, 0, 15184, 613416960, 0, 15184, 613416960, 0, 15200, 613416960, 0, 15200, 613416960, 0, 15200, 613416960, 0, 15200, 613416960, 0, 19520, 67110912, 0, 19520, 67110912, 0, 19536, 67110912, 0, 19536, 67110912, 0, 19552, 67110912, 0, 19552, 67110912, 0, 1088, 2147483651, 0, 1088, 2147483651, 0, 1088, 2147483651, 0, 2176, 1, 0, 2192, 1, 0, 2208, 1, 0, 7680, 2147483651, 0, 7680, 2147483651, 0, 7680, 2147483651, 0, 7684, 2147483651, 0, 7684, 2147483651, 0, 7684, 2147483651, 0, 7688, 2147483651, 0, 7688, 2147483651, 0, 7688, 2147483651, 0, 7696, 2147483651, 0, 7696, 2147483651, 0, 7696, 2147483651, 0, 7700, 2147483651, 0, 7700, 2147483651, 0, 7700, 2147483651, 0, 7704, 2147483651, 0, 7704, 2147483651, 0, 7704, 2147483651, 0, 7712, 2147483651, 0, 7712, 2147483651, 0, 7712, 2147483651, 0, 7716, 2147483651, 0, 7716, 2147483651, 0, 7716, 2147483651, 0, 7720, 2147483651, 0, 7720, 2147483651, 0, 7720, 2147483651, 0, 8384, 2147483651, 0, 8384, 2147483651, 0, 8384, 2147483651, 0, 9024, 73, 0, 9024, 73, 0, 9024, 73, 0, 10496, 65536, 0, 10512, 65536, 0, 11648, 2147483666, 0, 11648, 2147483666, 0, 11648, 2147483666, 0, 11664, 2147483666, 0, 11664, 2147483666, 0, 11664, 2147483666, 0, 12352, 2415919122, 0, 12352, 2415919122, 0, 12352, 2415919122, 0, 12352, 2415919122, 0, 12368, 2415919122, 0, 12368, 2415919122, 0, 12368, 2415919122, 0, 12368, 2415919122, 0, 14208, 2415919104, 0, 14208, 2415919104, 0, 14224, 2415919104, 0, 14224, 2415919104, 0, 15168, 613416960, 0, 15168, 613416960, 0, 15168, 613416960, 0, 15168, 613416960, 0, 15184, 613416960, 0, 15184, 613416960, 0, 15184, 613416960, 0, 15184, 613416960, 0, 15200, 613416960, 0, 15200, 613416960, 0, 15200, 613416960, 0, 15200, 613416960, 0, 19520, 67110912, 0, 19520, 67110912, 0, 19536, 67110912, 0, 19536, 67110912, 0, 19552, 67110912, 0, 19552, 67110912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378691919405714_363_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378691919405714_363_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ee62c2bc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378691919405714_363_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 7))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((62 << 6) | (i0 << 4)) | (i1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 672 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 3072, 1114112, 0, 3072, 1114112, 0, 3076, 1114112, 0, 3076, 1114112, 0, 3088, 1114112, 0, 3088, 1114112, 0, 3092, 1114112, 0, 3092, 1114112, 0, 4544, 160, 0, 4544, 160, 0, 4548, 160, 0, 4548, 160, 0, 4560, 160, 0, 4560, 160, 0, 4564, 160, 0, 4564, 160, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5440, 3758096384, 0, 5440, 3758096384, 0, 5440, 3758096384, 0, 5456, 3758096384, 0, 5456, 3758096384, 0, 5456, 3758096384, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 1040, 1431655765, 0, 3072, 1114112, 0, 3072, 1114112, 0, 3076, 1114112, 0, 3076, 1114112, 0, 3088, 1114112, 0, 3088, 1114112, 0, 3092, 1114112, 0, 3092, 1114112, 0, 4544, 160, 0, 4544, 160, 0, 4548, 160, 0, 4548, 160, 0, 4560, 160, 0, 4560, 160, 0, 4564, 160, 0, 4564, 160, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 4992, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5008, 4293656576, 0, 5440, 3758096384, 0, 5440, 3758096384, 0, 5440, 3758096384, 0, 5456, 3758096384, 0, 5456, 3758096384, 0, 5456, 3758096384, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6016, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0, 6032, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378696758040538_364_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378696758040538_364_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4a65566c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378696758040538_364_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 17, 0, 1024, 17, 0, 1920, 1073741892, 0, 1920, 1073741892, 0, 1920, 1073741892, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 1024, 17, 0, 1024, 17, 0, 1920, 1073741892, 0, 1920, 1073741892, 0, 1920, 1073741892, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0, 10048, 3221225599, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378704341205175_366_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378704341205175_366_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1cbb5e7b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378704341205175_366_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 268435473, 0, 1344, 268435473, 0, 1344, 268435473, 0, 1360, 268435473, 0, 1360, 268435473, 0, 1360, 268435473, 0, 2048, 285212689, 0, 2048, 285212689, 0, 2048, 285212689, 0, 2048, 285212689, 0, 2064, 285212689, 0, 2064, 285212689, 0, 2064, 285212689, 0, 2064, 285212689, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5568, 559240, 0, 5568, 559240, 0, 5568, 559240, 0, 5568, 559240, 0, 5568, 559240, 0, 1344, 268435473, 0, 1344, 268435473, 0, 1344, 268435473, 0, 1360, 268435473, 0, 1360, 268435473, 0, 1360, 268435473, 0, 2048, 285212689, 0, 2048, 285212689, 0, 2048, 285212689, 0, 2048, 285212689, 0, 2064, 285212689, 0, 2064, 285212689, 0, 2064, 285212689, 0, 2064, 285212689, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5120, 1145324612, 0, 5568, 559240, 0, 5568, 559240, 0, 5568, 559240, 0, 5568, 559240, 0, 5568, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378880391475472_369_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378880391475472_369_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..050dca4e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378880391475472_369_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 354 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3408, 1140850756, 0, 3408, 1140850756, 0, 3408, 1140850756, 0, 3408, 1140850756, 0, 3424, 1140850756, 0, 3424, 1140850756, 0, 3424, 1140850756, 0, 3424, 1140850756, 0, 4624, 1140850692, 0, 4624, 1140850692, 0, 4624, 1140850692, 0, 4628, 1140850692, 0, 4628, 1140850692, 0, 4628, 1140850692, 0, 4632, 1140850692, 0, 4632, 1140850692, 0, 4632, 1140850692, 0, 4640, 1140850692, 0, 4640, 1140850692, 0, 4640, 1140850692, 0, 4644, 1140850692, 0, 4644, 1140850692, 0, 4644, 1140850692, 0, 4648, 1140850692, 0, 4648, 1140850692, 0, 4648, 1140850692, 0, 5456, 1140850756, 0, 5456, 1140850756, 0, 5456, 1140850756, 0, 5456, 1140850756, 0, 5460, 1140850756, 0, 5460, 1140850756, 0, 5460, 1140850756, 0, 5460, 1140850756, 0, 5464, 1140850756, 0, 5464, 1140850756, 0, 5464, 1140850756, 0, 5464, 1140850756, 0, 5472, 1140850756, 0, 5472, 1140850756, 0, 5472, 1140850756, 0, 5472, 1140850756, 0, 5476, 1140850756, 0, 5476, 1140850756, 0, 5476, 1140850756, 0, 5476, 1140850756, 0, 5480, 1140850756, 0, 5480, 1140850756, 0, 5480, 1140850756, 0, 5480, 1140850756, 0, 6160, 1073741828, 0, 6160, 1073741828, 0, 6176, 1073741828, 0, 6176, 1073741828, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 3408, 1140850756, 0, 3408, 1140850756, 0, 3408, 1140850756, 0, 3408, 1140850756, 0, 3424, 1140850756, 0, 3424, 1140850756, 0, 3424, 1140850756, 0, 3424, 1140850756, 0, 4624, 1140850692, 0, 4624, 1140850692, 0, 4624, 1140850692, 0, 4628, 1140850692, 0, 4628, 1140850692, 0, 4628, 1140850692, 0, 4632, 1140850692, 0, 4632, 1140850692, 0, 4632, 1140850692, 0, 4640, 1140850692, 0, 4640, 1140850692, 0, 4640, 1140850692, 0, 4644, 1140850692, 0, 4644, 1140850692, 0, 4644, 1140850692, 0, 4648, 1140850692, 0, 4648, 1140850692, 0, 4648, 1140850692, 0, 5456, 1140850756, 0, 5456, 1140850756, 0, 5456, 1140850756, 0, 5456, 1140850756, 0, 5460, 1140850756, 0, 5460, 1140850756, 0, 5460, 1140850756, 0, 5460, 1140850756, 0, 5464, 1140850756, 0, 5464, 1140850756, 0, 5464, 1140850756, 0, 5464, 1140850756, 0, 5472, 1140850756, 0, 5472, 1140850756, 0, 5472, 1140850756, 0, 5472, 1140850756, 0, 5476, 1140850756, 0, 5476, 1140850756, 0, 5476, 1140850756, 0, 5476, 1140850756, 0, 5480, 1140850756, 0, 5480, 1140850756, 0, 5480, 1140850756, 0, 5480, 1140850756, 0, 6160, 1073741828, 0, 6160, 1073741828, 0, 6176, 1073741828, 0, 6176, 1073741828, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756378881961963209_370_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756378881961963209_370_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29894b0a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756378881961963209_370_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,277 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 31))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3456, 134348808, 0, 3456, 134348808, 0, 3456, 134348808, 0, 3472, 134348808, 0, 3472, 134348808, 0, 3472, 134348808, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 13248, 73, 0, 13248, 73, 0, 13248, 73, 0, 16576, 2147483712, 0, 16576, 2147483712, 0, 19200, 2147483648, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 3456, 134348808, 0, 3456, 134348808, 0, 3456, 134348808, 0, 3472, 134348808, 0, 3472, 134348808, 0, 3472, 134348808, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 5184, 1431655765, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 12608, 2728962722, 0, 13248, 73, 0, 13248, 73, 0, 13248, 73, 0, 16576, 2147483712, 0, 16576, 2147483712, 0, 19200, 2147483648, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0, 19520, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379033704295386_375_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379033704295386_375_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b2d85f80 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379033704295386_375_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,593 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((191 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (412 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (416 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (425 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((439 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((473 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((490 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((499 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((519 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((526 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 1)) { + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 18))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (565 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (574 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (583 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (597 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (606 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (610 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (623 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 438 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 2359296, 0, 1792, 2359296, 0, 1808, 2359296, 0, 1808, 2359296, 0, 2496, 1073741824, 0, 2512, 1073741824, 0, 6464, 1024, 0, 7872, 1024, 0, 7888, 1024, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 12228, 68174084, 0, 12228, 68174084, 0, 12228, 68174084, 0, 12228, 68174084, 0, 12228, 68174084, 0, 12232, 68174084, 0, 12232, 68174084, 0, 12232, 68174084, 0, 12232, 68174084, 0, 12232, 68174084, 0, 12244, 68174084, 0, 12244, 68174084, 0, 12244, 68174084, 0, 12244, 68174084, 0, 12244, 68174084, 0, 12248, 68174084, 0, 12248, 68174084, 0, 12248, 68174084, 0, 12248, 68174084, 0, 12248, 68174084, 0, 12260, 68174084, 0, 12260, 68174084, 0, 12260, 68174084, 0, 12260, 68174084, 0, 12260, 68174084, 0, 12264, 68174084, 0, 12264, 68174084, 0, 12264, 68174084, 0, 12264, 68174084, 0, 12264, 68174084, 0, 12992, 68174084, 0, 12992, 68174084, 0, 12992, 68174084, 0, 12992, 68174084, 0, 12992, 68174084, 0, 13008, 68174084, 0, 13008, 68174084, 0, 13008, 68174084, 0, 13008, 68174084, 0, 13008, 68174084, 0, 13024, 68174084, 0, 13024, 68174084, 0, 13024, 68174084, 0, 13024, 68174084, 0, 13024, 68174084, 0, 13824, 17, 0, 13824, 17, 0, 28112, 1073741824, 0, 31952, 1088, 0, 31952, 1088, 0, 31956, 1088, 0, 31956, 1088, 0, 31960, 1088, 0, 31960, 1088, 0, 33680, 1140850688, 0, 33680, 1140850688, 0, 36160, 8, 0, 39872, 2147483648, 0, 1792, 2359296, 0, 1792, 2359296, 0, 1808, 2359296, 0, 1808, 2359296, 0, 2496, 1073741824, 0, 2512, 1073741824, 0, 6464, 1024, 0, 7872, 1024, 0, 7888, 1024, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 9664, 2181570690, 0, 12228, 68174084, 0, 12228, 68174084, 0, 12228, 68174084, 0, 12228, 68174084, 0, 12228, 68174084, 0, 12232, 68174084, 0, 12232, 68174084, 0, 12232, 68174084, 0, 12232, 68174084, 0, 12232, 68174084, 0, 12244, 68174084, 0, 12244, 68174084, 0, 12244, 68174084, 0, 12244, 68174084, 0, 12244, 68174084, 0, 12248, 68174084, 0, 12248, 68174084, 0, 12248, 68174084, 0, 12248, 68174084, 0, 12248, 68174084, 0, 12260, 68174084, 0, 12260, 68174084, 0, 12260, 68174084, 0, 12260, 68174084, 0, 12260, 68174084, 0, 12264, 68174084, 0, 12264, 68174084, 0, 12264, 68174084, 0, 12264, 68174084, 0, 12264, 68174084, 0, 12992, 68174084, 0, 12992, 68174084, 0, 12992, 68174084, 0, 12992, 68174084, 0, 12992, 68174084, 0, 13008, 68174084, 0, 13008, 68174084, 0, 13008, 68174084, 0, 13008, 68174084, 0, 13008, 68174084, 0, 13024, 68174084, 0, 13024, 68174084, 0, 13024, 68174084, 0, 13024, 68174084, 0, 13024, 68174084, 0, 13824, 17, 0, 13824, 17, 0, 28112, 1073741824, 0, 31952, 1088, 0, 31952, 1088, 0, 31956, 1088, 0, 31956, 1088, 0, 31960, 1088, 0, 31960, 1088, 0, 33680, 1140850688, 0, 33680, 1140850688, 0, 36160, 8, 0, 39872, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379127448589202_378_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379127448589202_378_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c6c1014f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379127448589202_378_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,442 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 17))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((374 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (389 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 13)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((436 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (464 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (471 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((488 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 930 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 136348168, 0, 1344, 136348168, 0, 1344, 136348168, 0, 1344, 136348168, 0, 1344, 136348168, 0, 1360, 136348168, 0, 1360, 136348168, 0, 1360, 136348168, 0, 1360, 136348168, 0, 1360, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3460, 136348168, 0, 3460, 136348168, 0, 3460, 136348168, 0, 3460, 136348168, 0, 3460, 136348168, 0, 3464, 136348168, 0, 3464, 136348168, 0, 3464, 136348168, 0, 3464, 136348168, 0, 3464, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3476, 136348168, 0, 3476, 136348168, 0, 3476, 136348168, 0, 3476, 136348168, 0, 3476, 136348168, 0, 3480, 136348168, 0, 3480, 136348168, 0, 3480, 136348168, 0, 3480, 136348168, 0, 3480, 136348168, 0, 4736, 1073741889, 0, 4736, 1073741889, 0, 4736, 1073741889, 0, 4740, 1073741889, 0, 4740, 1073741889, 0, 4740, 1073741889, 0, 4744, 1073741889, 0, 4744, 1073741889, 0, 4744, 1073741889, 0, 4752, 1073741889, 0, 4752, 1073741889, 0, 4752, 1073741889, 0, 4756, 1073741889, 0, 4756, 1073741889, 0, 4756, 1073741889, 0, 4760, 1073741889, 0, 4760, 1073741889, 0, 4760, 1073741889, 0, 5440, 1073741889, 0, 5440, 1073741889, 0, 5440, 1073741889, 0, 5444, 1073741889, 0, 5444, 1073741889, 0, 5444, 1073741889, 0, 5448, 1073741889, 0, 5448, 1073741889, 0, 5448, 1073741889, 0, 5456, 1073741889, 0, 5456, 1073741889, 0, 5456, 1073741889, 0, 5460, 1073741889, 0, 5460, 1073741889, 0, 5460, 1073741889, 0, 5464, 1073741889, 0, 5464, 1073741889, 0, 5464, 1073741889, 0, 6656, 16809984, 0, 6656, 16809984, 0, 6660, 16809984, 0, 6660, 16809984, 0, 6664, 16809984, 0, 6664, 16809984, 0, 6672, 16809984, 0, 6672, 16809984, 0, 6676, 16809984, 0, 6676, 16809984, 0, 6680, 16809984, 0, 6680, 16809984, 0, 7424, 272696336, 0, 7424, 272696336, 0, 7424, 272696336, 0, 7424, 272696336, 0, 7424, 272696336, 0, 15808, 17, 0, 15808, 17, 0, 19776, 2097152, 0, 25216, 537002016, 0, 25216, 537002016, 0, 25216, 537002016, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 1344, 136348168, 0, 1344, 136348168, 0, 1344, 136348168, 0, 1344, 136348168, 0, 1344, 136348168, 0, 1360, 136348168, 0, 1360, 136348168, 0, 1360, 136348168, 0, 1360, 136348168, 0, 1360, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3460, 136348168, 0, 3460, 136348168, 0, 3460, 136348168, 0, 3460, 136348168, 0, 3460, 136348168, 0, 3464, 136348168, 0, 3464, 136348168, 0, 3464, 136348168, 0, 3464, 136348168, 0, 3464, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3476, 136348168, 0, 3476, 136348168, 0, 3476, 136348168, 0, 3476, 136348168, 0, 3476, 136348168, 0, 3480, 136348168, 0, 3480, 136348168, 0, 3480, 136348168, 0, 3480, 136348168, 0, 3480, 136348168, 0, 4736, 1073741889, 0, 4736, 1073741889, 0, 4736, 1073741889, 0, 4740, 1073741889, 0, 4740, 1073741889, 0, 4740, 1073741889, 0, 4744, 1073741889, 0, 4744, 1073741889, 0, 4744, 1073741889, 0, 4752, 1073741889, 0, 4752, 1073741889, 0, 4752, 1073741889, 0, 4756, 1073741889, 0, 4756, 1073741889, 0, 4756, 1073741889, 0, 4760, 1073741889, 0, 4760, 1073741889, 0, 4760, 1073741889, 0, 5440, 1073741889, 0, 5440, 1073741889, 0, 5440, 1073741889, 0, 5444, 1073741889, 0, 5444, 1073741889, 0, 5444, 1073741889, 0, 5448, 1073741889, 0, 5448, 1073741889, 0, 5448, 1073741889, 0, 5456, 1073741889, 0, 5456, 1073741889, 0, 5456, 1073741889, 0, 5460, 1073741889, 0, 5460, 1073741889, 0, 5460, 1073741889, 0, 5464, 1073741889, 0, 5464, 1073741889, 0, 5464, 1073741889, 0, 6656, 16809984, 0, 6656, 16809984, 0, 6660, 16809984, 0, 6660, 16809984, 0, 6664, 16809984, 0, 6664, 16809984, 0, 6672, 16809984, 0, 6672, 16809984, 0, 6676, 16809984, 0, 6676, 16809984, 0, 6680, 16809984, 0, 6680, 16809984, 0, 7424, 272696336, 0, 7424, 272696336, 0, 7424, 272696336, 0, 7424, 272696336, 0, 7424, 272696336, 0, 15808, 17, 0, 15808, 17, 0, 19776, 2097152, 0, 25216, 537002016, 0, 25216, 537002016, 0, 25216, 537002016, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 25536, 1145324612, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31232, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31248, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0, 31264, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379293917913104_380_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379293917913104_380_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6e4b02c4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379293917913104_380_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,219 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 1073741824, 0, 1504, 1073741824, 0, 1936, 9, 0, 1936, 9, 0, 1952, 9, 0, 1952, 9, 0, 3216, 64, 0, 3232, 64, 0, 3248, 64, 0, 3920, 64, 0, 3936, 64, 0, 3952, 64, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 7360, 65536, 0, 8272, 272629760, 0, 8272, 272629760, 0, 8288, 272629760, 0, 8288, 272629760, 0, 8304, 272629760, 0, 8304, 272629760, 0, 10112, 545392672, 0, 10112, 545392672, 0, 10112, 545392672, 0, 10112, 545392672, 0, 10112, 545392672, 0, 1488, 1073741824, 0, 1504, 1073741824, 0, 1936, 9, 0, 1936, 9, 0, 1952, 9, 0, 1952, 9, 0, 3216, 64, 0, 3232, 64, 0, 3248, 64, 0, 3920, 64, 0, 3936, 64, 0, 3952, 64, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 5824, 2181570690, 0, 7360, 65536, 0, 8272, 272629760, 0, 8272, 272629760, 0, 8288, 272629760, 0, 8288, 272629760, 0, 8304, 272629760, 0, 8304, 272629760, 0, 10112, 545392672, 0, 10112, 545392672, 0, 10112, 545392672, 0, 10112, 545392672, 0, 10112, 545392672, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379299315336389_381_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379299315336389_381_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0a54270 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379299315336389_381_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379495229178300_384_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379495229178300_384_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0e1d727c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379495229178300_384_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 17, 0, 3072, 17, 0, 4800, 537001984, 0, 4800, 537001984, 0, 4816, 537001984, 0, 4816, 537001984, 0, 9152, 131072, 0, 9156, 131072, 0, 9160, 131072, 0, 9168, 131072, 0, 9172, 131072, 0, 9176, 131072, 0, 10944, 8192, 0, 10960, 8192, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 12608, 85, 0, 12608, 85, 0, 12608, 85, 0, 12608, 85, 0, 3072, 17, 0, 3072, 17, 0, 4800, 537001984, 0, 4800, 537001984, 0, 4816, 537001984, 0, 4816, 537001984, 0, 9152, 131072, 0, 9156, 131072, 0, 9160, 131072, 0, 9168, 131072, 0, 9172, 131072, 0, 9176, 131072, 0, 10944, 8192, 0, 10960, 8192, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11264, 1145324612, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 11712, 559240, 0, 12608, 85, 0, 12608, 85, 0, 12608, 85, 0, 12608, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379505671255466_386_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379505671255466_386_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0eb3356d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379505671255466_386_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((121 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1344, 2, 0, 3072, 18, 0, 3072, 18, 0, 3076, 18, 0, 3076, 18, 0, 3088, 18, 0, 3088, 18, 0, 3092, 18, 0, 3092, 18, 0, 3104, 18, 0, 3104, 18, 0, 3108, 18, 0, 3108, 18, 0, 3904, 18, 0, 3904, 18, 0, 3908, 18, 0, 3908, 18, 0, 3920, 18, 0, 3920, 18, 0, 3924, 18, 0, 3924, 18, 0, 3936, 18, 0, 3936, 18, 0, 3940, 18, 0, 3940, 18, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 6084, 32768, 0, 6088, 32768, 0, 6092, 32768, 0, 6100, 32768, 0, 6104, 32768, 0, 6108, 32768, 0, 7748, 1073741824, 0, 7752, 1073741824, 0, 7756, 1073741824, 0, 7764, 1073741824, 0, 7768, 1073741824, 0, 7772, 1073741824, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1344, 2, 0, 3072, 18, 0, 3072, 18, 0, 3076, 18, 0, 3076, 18, 0, 3088, 18, 0, 3088, 18, 0, 3092, 18, 0, 3092, 18, 0, 3104, 18, 0, 3104, 18, 0, 3108, 18, 0, 3108, 18, 0, 3904, 18, 0, 3904, 18, 0, 3908, 18, 0, 3908, 18, 0, 3920, 18, 0, 3920, 18, 0, 3924, 18, 0, 3924, 18, 0, 3936, 18, 0, 3936, 18, 0, 3940, 18, 0, 3940, 18, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 4672, 613566756, 0, 6084, 32768, 0, 6088, 32768, 0, 6092, 32768, 0, 6100, 32768, 0, 6104, 32768, 0, 6108, 32768, 0, 7748, 1073741824, 0, 7752, 1073741824, 0, 7756, 1073741824, 0, 7764, 1073741824, 0, 7768, 1073741824, 0, 7772, 1073741824, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379559209025810_387_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379559209025810_387_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f1ed9958 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379559209025810_387_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,681 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 5)) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((330 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 19))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((385 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((407 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((426 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((433 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((442 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i5 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (454 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (483 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (502 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (509 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (516 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (525 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (530 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (537 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (541 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (551 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (584 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (608 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (627 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((645 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (657 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (662 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (667 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (671 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4480, 64, 0, 5056, 272696320, 0, 5056, 272696320, 0, 5056, 272696320, 0, 5056, 272696320, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 9088, 17, 0, 9088, 17, 0, 9920, 1, 0, 13440, 538050848, 0, 13440, 538050848, 0, 13440, 538050848, 0, 13440, 538050848, 0, 13440, 538050848, 0, 15872, 33554432, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 21136, 1, 0, 21152, 1, 0, 21168, 1, 0, 24640, 524416, 0, 24640, 524416, 0, 24656, 524416, 0, 24656, 524416, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 34368, 34824, 0, 34368, 34824, 0, 34368, 34824, 0, 42688, 1073741824, 0, 4480, 64, 0, 5056, 272696320, 0, 5056, 272696320, 0, 5056, 272696320, 0, 5056, 272696320, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 7744, 613566752, 0, 9088, 17, 0, 9088, 17, 0, 9920, 1, 0, 13440, 538050848, 0, 13440, 538050848, 0, 13440, 538050848, 0, 13440, 538050848, 0, 13440, 538050848, 0, 15872, 33554432, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18128, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18144, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 18160, 1879048199, 0, 21136, 1, 0, 21152, 1, 0, 21168, 1, 0, 24640, 524416, 0, 24640, 524416, 0, 24656, 524416, 0, 24656, 524416, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 29056, 2290124808, 0, 34368, 34824, 0, 34368, 34824, 0, 34368, 34824, 0, 42688, 1073741824, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379801378501750_391_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379801378501750_391_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3c0d33cd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379801378501750_391_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 21))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter1 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 366 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 16, 0, 3456, 16, 0, 3472, 16, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 6672, 4194304, 0, 6676, 4194304, 0, 6688, 4194304, 0, 6692, 4194304, 0, 8464, 268435456, 0, 8480, 268435456, 0, 9492, 7, 0, 9492, 7, 0, 9492, 7, 0, 9496, 7, 0, 9496, 7, 0, 9496, 7, 0, 9508, 7, 0, 9508, 7, 0, 9508, 7, 0, 9512, 7, 0, 9512, 7, 0, 9512, 7, 0, 10880, 604111360, 0, 10880, 604111360, 0, 10880, 604111360, 0, 10880, 604111360, 0, 2240, 16, 0, 3456, 16, 0, 3472, 16, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4816, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 4832, 4290773487, 0, 6672, 4194304, 0, 6676, 4194304, 0, 6688, 4194304, 0, 6692, 4194304, 0, 8464, 268435456, 0, 8480, 268435456, 0, 9492, 7, 0, 9492, 7, 0, 9492, 7, 0, 9496, 7, 0, 9496, 7, 0, 9496, 7, 0, 9508, 7, 0, 9508, 7, 0, 9508, 7, 0, 9512, 7, 0, 9512, 7, 0, 9512, 7, 0, 10880, 604111360, 0, 10880, 604111360, 0, 10880, 604111360, 0, 10880, 604111360, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379811046597479_392_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379811046597479_392_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2592fb05 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379811046597479_392_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,268 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3968, 134497281, 0, 3968, 134497281, 0, 3968, 134497281, 0, 3968, 134497281, 0, 3968, 134497281, 0, 3584, 3758096390, 0, 3584, 3758096390, 0, 3584, 3758096390, 0, 3584, 3758096390, 0, 3584, 3758096390, 0, 3328, 2048, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 4800, 1, 0, 5376, 268501008, 0, 5376, 268501008, 0, 5376, 268501008, 0, 5696, 1048832, 0, 5696, 1048832, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 3968, 134497281, 0, 3968, 134497281, 0, 3968, 134497281, 0, 3968, 134497281, 0, 3968, 134497281, 0, 3584, 3758096390, 0, 3584, 3758096390, 0, 3584, 3758096390, 0, 3584, 3758096390, 0, 3584, 3758096390, 0, 3328, 2048, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 3072, 44737192, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 2688, 357634384, 0, 4800, 1, 0, 5376, 268501008, 0, 5376, 268501008, 0, 5376, 268501008, 0, 5696, 1048832, 0, 5696, 1048832, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379815261638141_393_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379815261638141_393_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c743612d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379815261638141_393_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,111 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 2304, 8194, 0, 2304, 8194, 0, 2320, 8194, 0, 2320, 8194, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 2304, 8194, 0, 2304, 8194, 0, 2320, 8194, 0, 2320, 8194, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3460, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3464, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3476, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0, 3480, 2860515338, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756379816664185750_394_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756379816664185750_394_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..22f1b7a7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756379816664185750_394_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0, 3328, 4286578689, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380095008733439_396_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380095008733439_396_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0e2ed79b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380095008733439_396_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 714 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 545261632, 0, 2304, 545261632, 0, 2304, 545261632, 0, 2304, 545261632, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 3728, 2, 0, 3732, 2, 0, 3736, 2, 0, 3744, 2, 0, 3748, 2, 0, 3752, 2, 0, 3760, 2, 0, 3764, 2, 0, 3768, 2, 0, 6032, 18907200, 0, 6032, 18907200, 0, 6032, 18907200, 0, 6032, 18907200, 0, 6036, 18907200, 0, 6036, 18907200, 0, 6036, 18907200, 0, 6036, 18907200, 0, 6040, 18907200, 0, 6040, 18907200, 0, 6040, 18907200, 0, 6040, 18907200, 0, 6048, 18907200, 0, 6048, 18907200, 0, 6048, 18907200, 0, 6048, 18907200, 0, 6052, 18907200, 0, 6052, 18907200, 0, 6052, 18907200, 0, 6052, 18907200, 0, 6056, 18907200, 0, 6056, 18907200, 0, 6056, 18907200, 0, 6056, 18907200, 0, 6064, 18907200, 0, 6064, 18907200, 0, 6064, 18907200, 0, 6064, 18907200, 0, 6068, 18907200, 0, 6068, 18907200, 0, 6068, 18907200, 0, 6068, 18907200, 0, 6072, 18907200, 0, 6072, 18907200, 0, 6072, 18907200, 0, 6072, 18907200, 0, 7248, 134758401, 0, 7248, 134758401, 0, 7248, 134758401, 0, 7248, 134758401, 0, 7252, 134758401, 0, 7252, 134758401, 0, 7252, 134758401, 0, 7252, 134758401, 0, 7256, 134758401, 0, 7256, 134758401, 0, 7256, 134758401, 0, 7256, 134758401, 0, 7264, 134758401, 0, 7264, 134758401, 0, 7264, 134758401, 0, 7264, 134758401, 0, 7268, 134758401, 0, 7268, 134758401, 0, 7268, 134758401, 0, 7268, 134758401, 0, 7272, 134758401, 0, 7272, 134758401, 0, 7272, 134758401, 0, 7272, 134758401, 0, 7280, 134758401, 0, 7280, 134758401, 0, 7280, 134758401, 0, 7280, 134758401, 0, 7284, 134758401, 0, 7284, 134758401, 0, 7284, 134758401, 0, 7284, 134758401, 0, 7288, 134758401, 0, 7288, 134758401, 0, 7288, 134758401, 0, 7288, 134758401, 0, 7696, 8, 0, 7700, 8, 0, 7704, 8, 0, 7712, 8, 0, 7716, 8, 0, 7720, 8, 0, 7728, 8, 0, 7732, 8, 0, 7736, 8, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0, 2304, 545261632, 0, 2304, 545261632, 0, 2304, 545261632, 0, 2304, 545261632, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 1920, 3741319168, 0, 3728, 2, 0, 3732, 2, 0, 3736, 2, 0, 3744, 2, 0, 3748, 2, 0, 3752, 2, 0, 3760, 2, 0, 3764, 2, 0, 3768, 2, 0, 6032, 18907200, 0, 6032, 18907200, 0, 6032, 18907200, 0, 6032, 18907200, 0, 6036, 18907200, 0, 6036, 18907200, 0, 6036, 18907200, 0, 6036, 18907200, 0, 6040, 18907200, 0, 6040, 18907200, 0, 6040, 18907200, 0, 6040, 18907200, 0, 6048, 18907200, 0, 6048, 18907200, 0, 6048, 18907200, 0, 6048, 18907200, 0, 6052, 18907200, 0, 6052, 18907200, 0, 6052, 18907200, 0, 6052, 18907200, 0, 6056, 18907200, 0, 6056, 18907200, 0, 6056, 18907200, 0, 6056, 18907200, 0, 6064, 18907200, 0, 6064, 18907200, 0, 6064, 18907200, 0, 6064, 18907200, 0, 6068, 18907200, 0, 6068, 18907200, 0, 6068, 18907200, 0, 6068, 18907200, 0, 6072, 18907200, 0, 6072, 18907200, 0, 6072, 18907200, 0, 6072, 18907200, 0, 7248, 134758401, 0, 7248, 134758401, 0, 7248, 134758401, 0, 7248, 134758401, 0, 7252, 134758401, 0, 7252, 134758401, 0, 7252, 134758401, 0, 7252, 134758401, 0, 7256, 134758401, 0, 7256, 134758401, 0, 7256, 134758401, 0, 7256, 134758401, 0, 7264, 134758401, 0, 7264, 134758401, 0, 7264, 134758401, 0, 7264, 134758401, 0, 7268, 134758401, 0, 7268, 134758401, 0, 7268, 134758401, 0, 7268, 134758401, 0, 7272, 134758401, 0, 7272, 134758401, 0, 7272, 134758401, 0, 7272, 134758401, 0, 7280, 134758401, 0, 7280, 134758401, 0, 7280, 134758401, 0, 7280, 134758401, 0, 7284, 134758401, 0, 7284, 134758401, 0, 7284, 134758401, 0, 7284, 134758401, 0, 7288, 134758401, 0, 7288, 134758401, 0, 7288, 134758401, 0, 7288, 134758401, 0, 7696, 8, 0, 7700, 8, 0, 7704, 8, 0, 7712, 8, 0, 7716, 8, 0, 7720, 8, 0, 7728, 8, 0, 7732, 8, 0, 7736, 8, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8400, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8416, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0, 8432, 3758096391, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380331543231343_399_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380331543231343_399_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..100dc904 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380331543231343_399_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,469 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((218 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 14)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + case 2: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((421 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((461 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 1)) { + break; + } + } + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((490 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((528 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((543 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter5 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((561 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (565 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4736, 1426063360, 0, 4736, 1426063360, 0, 4736, 1426063360, 0, 4736, 1426063360, 0, 6272, 17, 0, 6272, 17, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 10192, 2621440, 0, 10192, 2621440, 0, 10208, 2621440, 0, 10208, 2621440, 0, 12880, 32, 0, 12896, 32, 0, 16192, 17, 0, 16192, 17, 0, 31364, 2290089992, 0, 31364, 2290089992, 0, 31364, 2290089992, 0, 31364, 2290089992, 0, 31368, 2290089992, 0, 31368, 2290089992, 0, 31368, 2290089992, 0, 31368, 2290089992, 0, 31380, 2290089992, 0, 31380, 2290089992, 0, 31380, 2290089992, 0, 31380, 2290089992, 0, 31384, 2290089992, 0, 31384, 2290089992, 0, 31384, 2290089992, 0, 31384, 2290089992, 0, 4736, 1426063360, 0, 4736, 1426063360, 0, 4736, 1426063360, 0, 4736, 1426063360, 0, 6272, 17, 0, 6272, 17, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 7168, 1141130308, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8320, 1427461461, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 8768, 1427374080, 0, 10192, 2621440, 0, 10192, 2621440, 0, 10208, 2621440, 0, 10208, 2621440, 0, 12880, 32, 0, 12896, 32, 0, 16192, 17, 0, 16192, 17, 0, 31364, 2290089992, 0, 31364, 2290089992, 0, 31364, 2290089992, 0, 31364, 2290089992, 0, 31368, 2290089992, 0, 31368, 2290089992, 0, 31368, 2290089992, 0, 31368, 2290089992, 0, 31380, 2290089992, 0, 31380, 2290089992, 0, 31380, 2290089992, 0, 31380, 2290089992, 0, 31384, 2290089992, 0, 31384, 2290089992, 0, 31384, 2290089992, 0, 31384, 2290089992, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380374720289015_400_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380374720289015_400_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f7f299f6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380374720289015_400_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,279 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((213 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((223 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((232 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((236 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4096, 1342177281, 0, 4096, 1342177281, 0, 4096, 1342177281, 0, 11584, 278532, 0, 11584, 278532, 0, 11584, 278532, 0, 12752, 2684354562, 0, 12752, 2684354562, 0, 12752, 2684354562, 0, 12768, 2684354562, 0, 12768, 2684354562, 0, 12768, 2684354562, 0, 15572, 32768, 0, 15576, 32768, 0, 15580, 32768, 0, 15588, 32768, 0, 15592, 32768, 0, 15596, 32768, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4096, 1342177281, 0, 4096, 1342177281, 0, 4096, 1342177281, 0, 11584, 278532, 0, 11584, 278532, 0, 11584, 278532, 0, 12752, 2684354562, 0, 12752, 2684354562, 0, 12752, 2684354562, 0, 12768, 2684354562, 0, 12768, 2684354562, 0, 12768, 2684354562, 0, 15572, 32768, 0, 15576, 32768, 0, 15580, 32768, 0, 15588, 32768, 0, 15592, 32768, 0, 15596, 32768, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16272, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0, 16288, 2860515330, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380430802687885_402_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380430802687885_402_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2c9c202e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380430802687885_402_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 31)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 4672, 2147483648, 0, 4688, 2147483648, 0, 4704, 2147483648, 0, 576, 17, 0, 576, 17, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2192, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2208, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 2224, 572662306, 0, 4672, 2147483648, 0, 4688, 2147483648, 0, 4704, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380435293349652_403_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380435293349652_403_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f27119b4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380435293349652_403_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,309 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((27 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((173 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((204 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((215 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1732, 1, 0, 1736, 1, 0, 1748, 1, 0, 1752, 1, 0, 1764, 1, 0, 1768, 1, 0, 2308, 268501008, 0, 2308, 268501008, 0, 2308, 268501008, 0, 2312, 268501008, 0, 2312, 268501008, 0, 2312, 268501008, 0, 2324, 268501008, 0, 2324, 268501008, 0, 2324, 268501008, 0, 2328, 268501008, 0, 2328, 268501008, 0, 2328, 268501008, 0, 2340, 268501008, 0, 2340, 268501008, 0, 2340, 268501008, 0, 2344, 268501008, 0, 2344, 268501008, 0, 2344, 268501008, 0, 2628, 1048832, 0, 2628, 1048832, 0, 2632, 1048832, 0, 2632, 1048832, 0, 2644, 1048832, 0, 2644, 1048832, 0, 2648, 1048832, 0, 2648, 1048832, 0, 2660, 1048832, 0, 2660, 1048832, 0, 2664, 1048832, 0, 2664, 1048832, 0, 4228, 16, 0, 4232, 16, 0, 4244, 16, 0, 4248, 16, 0, 4260, 16, 0, 4264, 16, 0, 10192, 134217728, 0, 10208, 134217728, 0, 13076, 559240, 0, 13076, 559240, 0, 13076, 559240, 0, 13076, 559240, 0, 13076, 559240, 0, 13092, 559240, 0, 13092, 559240, 0, 13092, 559240, 0, 13092, 559240, 0, 13092, 559240, 0, 13780, 2290614272, 0, 13780, 2290614272, 0, 13780, 2290614272, 0, 13780, 2290614272, 0, 13796, 2290614272, 0, 13796, 2290614272, 0, 13796, 2290614272, 0, 13796, 2290614272, 0, 1732, 1, 0, 1736, 1, 0, 1748, 1, 0, 1752, 1, 0, 1764, 1, 0, 1768, 1, 0, 2308, 268501008, 0, 2308, 268501008, 0, 2308, 268501008, 0, 2312, 268501008, 0, 2312, 268501008, 0, 2312, 268501008, 0, 2324, 268501008, 0, 2324, 268501008, 0, 2324, 268501008, 0, 2328, 268501008, 0, 2328, 268501008, 0, 2328, 268501008, 0, 2340, 268501008, 0, 2340, 268501008, 0, 2340, 268501008, 0, 2344, 268501008, 0, 2344, 268501008, 0, 2344, 268501008, 0, 2628, 1048832, 0, 2628, 1048832, 0, 2632, 1048832, 0, 2632, 1048832, 0, 2644, 1048832, 0, 2644, 1048832, 0, 2648, 1048832, 0, 2648, 1048832, 0, 2660, 1048832, 0, 2660, 1048832, 0, 2664, 1048832, 0, 2664, 1048832, 0, 4228, 16, 0, 4232, 16, 0, 4244, 16, 0, 4248, 16, 0, 4260, 16, 0, 4264, 16, 0, 10192, 134217728, 0, 10208, 134217728, 0, 13076, 559240, 0, 13076, 559240, 0, 13076, 559240, 0, 13076, 559240, 0, 13076, 559240, 0, 13092, 559240, 0, 13092, 559240, 0, 13092, 559240, 0, 13092, 559240, 0, 13092, 559240, 0, 13780, 2290614272, 0, 13780, 2290614272, 0, 13780, 2290614272, 0, 13780, 2290614272, 0, 13796, 2290614272, 0, 13796, 2290614272, 0, 13796, 2290614272, 0, 13796, 2290614272, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380475618885578_404_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380475618885578_404_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bc6c7e8e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380475618885578_404_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2240, 8, 0, 2256, 8, 0, 2272, 8, 0, 2944, 32768, 0, 2960, 32768, 0, 2976, 32768, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 7808, 1048576, 0, 8832, 85, 0, 8832, 85, 0, 8832, 85, 0, 8832, 85, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2240, 8, 0, 2256, 8, 0, 2272, 8, 0, 2944, 32768, 0, 2960, 32768, 0, 2976, 32768, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 8192, 2863311530, 0, 7808, 1048576, 0, 8832, 85, 0, 8832, 85, 0, 8832, 85, 0, 8832, 85, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0, 9408, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380477601141426_405_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380477601141426_405_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0ec8ab0e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380477601141426_405_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 1856, 73, 0, 1856, 73, 0, 1856, 73, 0, 2432, 272696336, 0, 2432, 272696336, 0, 2432, 272696336, 0, 2432, 272696336, 0, 2432, 272696336, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 1216, 4261412864, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 832, 22369621, 0, 1856, 73, 0, 1856, 73, 0, 1856, 73, 0, 2432, 272696336, 0, 2432, 272696336, 0, 2432, 272696336, 0, 2432, 272696336, 0, 2432, 272696336, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0, 2752, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380478073433062_406_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380478073433062_406_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eec187c9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380478073433062_406_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 16)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 6))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 4681, 0, 768, 4681, 0, 768, 4681, 0, 768, 4681, 0, 768, 4681, 0, 1936, 9, 0, 1936, 9, 0, 1952, 9, 0, 1952, 9, 0, 1968, 9, 0, 1968, 9, 0, 2368, 9, 0, 2368, 9, 0, 4048, 2, 0, 4064, 2, 0, 8576, 537004064, 0, 8576, 537004064, 0, 8576, 537004064, 0, 8576, 537004064, 0, 8592, 537004064, 0, 8592, 537004064, 0, 8592, 537004064, 0, 8592, 537004064, 0, 8608, 537004064, 0, 8608, 537004064, 0, 8608, 537004064, 0, 8608, 537004064, 0, 9280, 536870948, 0, 9280, 536870948, 0, 9280, 536870948, 0, 9296, 536870948, 0, 9296, 536870948, 0, 9296, 536870948, 0, 9312, 536870948, 0, 9312, 536870948, 0, 9312, 536870948, 0, 768, 4681, 0, 768, 4681, 0, 768, 4681, 0, 768, 4681, 0, 768, 4681, 0, 1936, 9, 0, 1936, 9, 0, 1952, 9, 0, 1952, 9, 0, 1968, 9, 0, 1968, 9, 0, 2368, 9, 0, 2368, 9, 0, 4048, 2, 0, 4064, 2, 0, 8576, 537004064, 0, 8576, 537004064, 0, 8576, 537004064, 0, 8576, 537004064, 0, 8592, 537004064, 0, 8592, 537004064, 0, 8592, 537004064, 0, 8592, 537004064, 0, 8608, 537004064, 0, 8608, 537004064, 0, 8608, 537004064, 0, 8608, 537004064, 0, 9280, 536870948, 0, 9280, 536870948, 0, 9280, 536870948, 0, 9296, 536870948, 0, 9296, 536870948, 0, 9296, 536870948, 0, 9312, 536870948, 0, 9312, 536870948, 0, 9312, 536870948, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380483887584209_407_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380483887584209_407_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d839f76 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380483887584209_407_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 3758096384, 0, 1488, 3758096384, 0, 1488, 3758096384, 0, 1504, 3758096384, 0, 1504, 3758096384, 0, 1504, 3758096384, 0, 2768, 2147483648, 0, 2784, 2147483648, 0, 3984, 268435456, 0, 4000, 268435456, 0, 4304, 603979776, 0, 4304, 603979776, 0, 4320, 603979776, 0, 4320, 603979776, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 1488, 3758096384, 0, 1488, 3758096384, 0, 1488, 3758096384, 0, 1504, 3758096384, 0, 1504, 3758096384, 0, 1504, 3758096384, 0, 2768, 2147483648, 0, 2784, 2147483648, 0, 3984, 268435456, 0, 4000, 268435456, 0, 4304, 603979776, 0, 4304, 603979776, 0, 4320, 603979776, 0, 4320, 603979776, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5008, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0, 5024, 4261412864, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380484509425908_408_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380484509425908_408_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17ace84a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380484509425908_408_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,80 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380506688072500_410_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380506688072500_410_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf2e874e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380506688072500_410_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,199 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3648, 2147483648, 0, 3652, 2147483648, 0, 3664, 2147483648, 0, 3668, 2147483648, 0, 3680, 2147483648, 0, 3684, 2147483648, 0, 4352, 2415919122, 0, 4352, 2415919122, 0, 4352, 2415919122, 0, 4352, 2415919122, 0, 4356, 2415919122, 0, 4356, 2415919122, 0, 4356, 2415919122, 0, 4356, 2415919122, 0, 4368, 2415919122, 0, 4368, 2415919122, 0, 4368, 2415919122, 0, 4368, 2415919122, 0, 4372, 2415919122, 0, 4372, 2415919122, 0, 4372, 2415919122, 0, 4372, 2415919122, 0, 4384, 2415919122, 0, 4384, 2415919122, 0, 4384, 2415919122, 0, 4384, 2415919122, 0, 4388, 2415919122, 0, 4388, 2415919122, 0, 4388, 2415919122, 0, 4388, 2415919122, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3648, 2147483648, 0, 3652, 2147483648, 0, 3664, 2147483648, 0, 3668, 2147483648, 0, 3680, 2147483648, 0, 3684, 2147483648, 0, 4352, 2415919122, 0, 4352, 2415919122, 0, 4352, 2415919122, 0, 4352, 2415919122, 0, 4356, 2415919122, 0, 4356, 2415919122, 0, 4356, 2415919122, 0, 4356, 2415919122, 0, 4368, 2415919122, 0, 4368, 2415919122, 0, 4368, 2415919122, 0, 4368, 2415919122, 0, 4372, 2415919122, 0, 4372, 2415919122, 0, 4372, 2415919122, 0, 4372, 2415919122, 0, 4384, 2415919122, 0, 4384, 2415919122, 0, 4384, 2415919122, 0, 4384, 2415919122, 0, 4388, 2415919122, 0, 4388, 2415919122, 0, 4388, 2415919122, 0, 4388, 2415919122, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0, 5504, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380515413745969_411_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380515413745969_411_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87dc0cb2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380515413745969_411_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 268435457, 0, 1296, 268435457, 0, 1312, 268435457, 0, 1312, 268435457, 0, 3216, 268435729, 0, 3216, 268435729, 0, 3216, 268435729, 0, 3216, 268435729, 0, 3232, 268435729, 0, 3232, 268435729, 0, 3232, 268435729, 0, 3232, 268435729, 0, 4624, 536870912, 0, 4640, 536870912, 0, 4656, 536870912, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9728, 559240, 0, 9728, 559240, 0, 9728, 559240, 0, 9728, 559240, 0, 9728, 559240, 0, 10880, 3221225475, 0, 10880, 3221225475, 0, 10880, 3221225475, 0, 10880, 3221225475, 0, 11520, 73, 0, 11520, 73, 0, 11520, 73, 0, 12608, 18, 0, 12608, 18, 0, 13504, 536870944, 0, 13504, 536870944, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 1296, 268435457, 0, 1296, 268435457, 0, 1312, 268435457, 0, 1312, 268435457, 0, 3216, 268435729, 0, 3216, 268435729, 0, 3216, 268435729, 0, 3216, 268435729, 0, 3232, 268435729, 0, 3232, 268435729, 0, 3232, 268435729, 0, 3232, 268435729, 0, 4624, 536870912, 0, 4640, 536870912, 0, 4656, 536870912, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9280, 1145324612, 0, 9728, 559240, 0, 9728, 559240, 0, 9728, 559240, 0, 9728, 559240, 0, 9728, 559240, 0, 10880, 3221225475, 0, 10880, 3221225475, 0, 10880, 3221225475, 0, 10880, 3221225475, 0, 11520, 73, 0, 11520, 73, 0, 11520, 73, 0, 12608, 18, 0, 12608, 18, 0, 13504, 536870944, 0, 13504, 536870944, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0, 14464, 4278190143, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380524864319206_412_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380524864319206_412_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3e46b356 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380524864319206_412_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,154 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 30))) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4624, 536903680, 0, 4624, 536903680, 0, 4640, 536903680, 0, 4640, 536903680, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4624, 536903680, 0, 4624, 536903680, 0, 4640, 536903680, 0, 4640, 536903680, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7696, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7712, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 7728, 1073742165, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8272, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8288, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0, 8304, 2147484330, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380557238792581_414_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380557238792581_414_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b9ddf71d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380557238792581_414_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 5632, 17, 0, 5632, 17, 0, 7872, 3146240, 0, 7872, 3146240, 0, 7872, 3146240, 0, 7876, 3146240, 0, 7876, 3146240, 0, 7876, 3146240, 0, 7888, 3146240, 0, 7888, 3146240, 0, 7888, 3146240, 0, 7892, 3146240, 0, 7892, 3146240, 0, 7892, 3146240, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8640, 559240, 0, 8640, 559240, 0, 8640, 559240, 0, 8640, 559240, 0, 8640, 559240, 0, 9536, 85, 0, 9536, 85, 0, 9536, 85, 0, 9536, 85, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 5632, 17, 0, 5632, 17, 0, 7872, 3146240, 0, 7872, 3146240, 0, 7872, 3146240, 0, 7876, 3146240, 0, 7876, 3146240, 0, 7876, 3146240, 0, 7888, 3146240, 0, 7888, 3146240, 0, 7888, 3146240, 0, 7892, 3146240, 0, 7892, 3146240, 0, 7892, 3146240, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8192, 2004318071, 0, 8640, 559240, 0, 8640, 559240, 0, 8640, 559240, 0, 8640, 559240, 0, 8640, 559240, 0, 9536, 85, 0, 9536, 85, 0, 9536, 85, 0, 9536, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756380569211829016_415_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756380569211829016_415_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c383dd09 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756380569211829016_415_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 21)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 336 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3520, 272630018, 0, 3520, 272630018, 0, 3520, 272630018, 0, 3520, 272630018, 0, 3264, 2048, 0, 3008, 2147615760, 0, 3008, 2147615760, 0, 3008, 2147615760, 0, 3008, 2147615760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 6016, 16, 0, 8192, 65, 0, 8192, 65, 0, 8768, 272696320, 0, 8768, 272696320, 0, 8768, 272696320, 0, 8768, 272696320, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 10048, 1409286149, 0, 10048, 1409286149, 0, 10048, 1409286149, 0, 10048, 1409286149, 0, 10048, 1409286149, 0, 11520, 73, 0, 11520, 73, 0, 11520, 73, 0, 12096, 272696336, 0, 12096, 272696336, 0, 12096, 272696336, 0, 12096, 272696336, 0, 12096, 272696336, 0, 13776, 1065220, 0, 13776, 1065220, 0, 13776, 1065220, 0, 13776, 1065220, 0, 14352, 133152, 0, 14352, 133152, 0, 14352, 133152, 0, 3520, 272630018, 0, 3520, 272630018, 0, 3520, 272630018, 0, 3520, 272630018, 0, 3264, 2048, 0, 3008, 2147615760, 0, 3008, 2147615760, 0, 3008, 2147615760, 0, 3008, 2147615760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 2624, 715694760, 0, 6016, 16, 0, 8192, 65, 0, 8192, 65, 0, 8768, 272696320, 0, 8768, 272696320, 0, 8768, 272696320, 0, 8768, 272696320, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 9088, 340854020, 0, 10048, 1409286149, 0, 10048, 1409286149, 0, 10048, 1409286149, 0, 10048, 1409286149, 0, 10048, 1409286149, 0, 11520, 73, 0, 11520, 73, 0, 11520, 73, 0, 12096, 272696336, 0, 12096, 272696336, 0, 12096, 272696336, 0, 12096, 272696336, 0, 12096, 272696336, 0, 13776, 1065220, 0, 13776, 1065220, 0, 13776, 1065220, 0, 13776, 1065220, 0, 14352, 133152, 0, 14352, 133152, 0, 14352, 133152, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381296732966331_418_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381296732966331_418_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e890ef5b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381296732966331_418_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 2149613601, 0, 3072, 2149613601, 0, 3072, 2149613601, 0, 3072, 2149613601, 0, 3072, 2149613601, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2304, 536870912, 0, 3072, 2149613601, 0, 3072, 2149613601, 0, 3072, 2149613601, 0, 3072, 2149613601, 0, 3072, 2149613601, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2688, 1431655764, 0, 2304, 536870912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381307348090385_420_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381307348090385_420_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..346b6f6d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381307348090385_420_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1616, 18, 0, 1616, 18, 0, 1632, 18, 0, 1632, 18, 0, 1648, 18, 0, 1648, 18, 0, 2644, 16, 0, 2660, 16, 0, 2676, 16, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3776, 73, 0, 3776, 73, 0, 3776, 73, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 6288, 32, 0, 6304, 32, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1616, 18, 0, 1616, 18, 0, 1632, 18, 0, 1632, 18, 0, 1648, 18, 0, 1648, 18, 0, 2644, 16, 0, 2660, 16, 0, 2676, 16, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3136, 613566756, 0, 3776, 73, 0, 3776, 73, 0, 3776, 73, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 6288, 32, 0, 6304, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381312013157634_421_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381312013157634_421_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6bb479ae --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381312013157634_421_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,225 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((121 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1664, 1048832, 0, 1664, 1048832, 0, 3712, 64, 0, 3728, 64, 0, 3744, 64, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 8704, 64, 0, 8720, 64, 0, 8736, 64, 0, 9152, 559240, 0, 9152, 559240, 0, 9152, 559240, 0, 9152, 559240, 0, 9152, 559240, 0, 768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1664, 1048832, 0, 1664, 1048832, 0, 3712, 64, 0, 3728, 64, 0, 3744, 64, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6656, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6672, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 6688, 1145324612, 0, 8704, 64, 0, 8720, 64, 0, 8736, 64, 0, 9152, 559240, 0, 9152, 559240, 0, 9152, 559240, 0, 9152, 559240, 0, 9152, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381704241555845_424_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381704241555845_424_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e1b45d88 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381704241555845_424_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2752, 4194308, 0, 2752, 4194308, 0, 576, 17, 0, 576, 17, 0, 2752, 4194308, 0, 2752, 4194308, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381704571873856_425_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381704571873856_425_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..12d00ed2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381704571873856_425_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,294 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 19))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 31)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 8128, 512, 0, 10304, 536871458, 0, 10304, 536871458, 0, 10304, 536871458, 0, 10304, 536871458, 0, 10320, 536871458, 0, 10320, 536871458, 0, 10320, 536871458, 0, 10320, 536871458, 0, 13056, 131072, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 14016, 8, 0, 14912, 8390656, 0, 14912, 8390656, 0, 576, 17, 0, 576, 17, 0, 8128, 512, 0, 10304, 536871458, 0, 10304, 536871458, 0, 10304, 536871458, 0, 10304, 536871458, 0, 10320, 536871458, 0, 10320, 536871458, 0, 10320, 536871458, 0, 10320, 536871458, 0, 13056, 131072, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 13376, 1145324612, 0, 14016, 8, 0, 14912, 8390656, 0, 14912, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381756923960358_429_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381756923960358_429_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..70df8e41 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381756923960358_429_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 4416, 67110912, 0, 4416, 67110912, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 3776, 1073741825, 0, 3776, 1073741825, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 4416, 67110912, 0, 4416, 67110912, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 4032, 2863309482, 0, 3776, 1073741825, 0, 3776, 1073741825, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381757632244863_430_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381757632244863_430_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d8b00f9f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381757632244863_430_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 3221225475, 0, 4096, 3221225475, 0, 4096, 3221225475, 0, 4096, 3221225475, 0, 3840, 4196352, 0, 3840, 4196352, 0, 3584, 33816832, 0, 3584, 33816832, 0, 3584, 33816832, 0, 3200, 4, 0, 2816, 805306368, 0, 2816, 805306368, 0, 4096, 3221225475, 0, 4096, 3221225475, 0, 4096, 3221225475, 0, 4096, 3221225475, 0, 3840, 4196352, 0, 3840, 4196352, 0, 3584, 33816832, 0, 3584, 33816832, 0, 3584, 33816832, 0, 3200, 4, 0, 2816, 805306368, 0, 2816, 805306368, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381758168749087_431_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381758168749087_431_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..592627c4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381758168749087_431_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,94 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381780781614541_433_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381780781614541_433_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ec428e48 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381780781614541_433_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 21))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5952, 2097152, 0, 8128, 85, 0, 8128, 85, 0, 8128, 85, 0, 8128, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5952, 2097152, 0, 8128, 85, 0, 8128, 85, 0, 8128, 85, 0, 8128, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756381898521169041_435_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756381898521169041_435_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7bf230d5 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756381898521169041_435_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,160 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 85, 0, 768, 85, 0, 768, 85, 0, 768, 85, 0, 5248, 8389120, 0, 5248, 8389120, 0, 4864, 134217728, 0, 768, 85, 0, 768, 85, 0, 768, 85, 0, 768, 85, 0, 5248, 8389120, 0, 5248, 8389120, 0, 4864, 134217728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756382007285407119_442_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756382007285407119_442_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..03bb987a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756382007285407119_442_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,600 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 31)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((387 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((402 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (425 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (429 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((453 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((479 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((486 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (498 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((516 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((525 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (532 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (541 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (550 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (564 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((582 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((596 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (605 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 456 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 2688, 520, 0, 2688, 520, 0, 2704, 520, 0, 2704, 520, 0, 2720, 520, 0, 2720, 520, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 7104, 1073741825, 0, 7104, 1073741825, 0, 10496, 1073741825, 0, 10496, 1073741825, 0, 11648, 2415919122, 0, 11648, 2415919122, 0, 11648, 2415919122, 0, 11648, 2415919122, 0, 12288, 16, 0, 12928, 33554432, 0, 14144, 128, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15552, 4718592, 0, 15552, 4718592, 0, 18384, 4194304, 0, 18400, 4194304, 0, 18416, 4194304, 0, 34624, 17043456, 0, 34624, 17043456, 0, 34624, 17043456, 0, 35200, 272696336, 0, 35200, 272696336, 0, 35200, 272696336, 0, 35200, 272696336, 0, 35200, 272696336, 0, 37264, 536870912, 0, 37280, 536870912, 0, 37296, 536870912, 0, 38164, 32, 0, 38168, 32, 0, 38180, 32, 0, 38184, 32, 0, 38196, 32, 0, 38200, 32, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 2688, 520, 0, 2688, 520, 0, 2704, 520, 0, 2704, 520, 0, 2720, 520, 0, 2720, 520, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 4032, 1363481681, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 7104, 1073741825, 0, 7104, 1073741825, 0, 10496, 1073741825, 0, 10496, 1073741825, 0, 11648, 2415919122, 0, 11648, 2415919122, 0, 11648, 2415919122, 0, 11648, 2415919122, 0, 12288, 16, 0, 12928, 33554432, 0, 14144, 128, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15104, 2449473682, 0, 15552, 4718592, 0, 15552, 4718592, 0, 18384, 4194304, 0, 18400, 4194304, 0, 18416, 4194304, 0, 34624, 17043456, 0, 34624, 17043456, 0, 34624, 17043456, 0, 35200, 272696336, 0, 35200, 272696336, 0, 35200, 272696336, 0, 35200, 272696336, 0, 35200, 272696336, 0, 37264, 536870912, 0, 37280, 536870912, 0, 37296, 536870912, 0, 38164, 32, 0, 38168, 32, 0, 38180, 32, 0, 38184, 32, 0, 38196, 32, 0, 38200, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756382138877294432_443_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756382138877294432_443_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..56c44ec0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756382138877294432_443_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,152 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1092 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 9408, 4160749568, 0, 9408, 4160749568, 0, 9408, 4160749568, 0, 9408, 4160749568, 0, 9408, 4160749568, 0, 9424, 4160749568, 0, 9424, 4160749568, 0, 9424, 4160749568, 0, 9424, 4160749568, 0, 9424, 4160749568, 0, 9440, 4160749568, 0, 9440, 4160749568, 0, 9440, 4160749568, 0, 9440, 4160749568, 0, 9440, 4160749568, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 1088, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2256, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2272, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 2288, 3221225983, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5584, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5600, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 5616, 3221225503, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6656, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6672, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 6688, 4294836224, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8256, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8272, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8288, 3221225727, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8960, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8976, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 8992, 3758096447, 0, 9408, 4160749568, 0, 9408, 4160749568, 0, 9408, 4160749568, 0, 9408, 4160749568, 0, 9408, 4160749568, 0, 9424, 4160749568, 0, 9424, 4160749568, 0, 9424, 4160749568, 0, 9424, 4160749568, 0, 9424, 4160749568, 0, 9440, 4160749568, 0, 9440, 4160749568, 0, 9440, 4160749568, 0, 9440, 4160749568, 0, 9440, 4160749568, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756382155275532718_444_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756382155275532718_444_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..69b6fe89 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756382155275532718_444_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,452 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((180 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((187 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((257 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((275 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((286 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((301 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (338 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((361 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (366 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (437 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (456 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((474 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (489 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (494 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (501 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 522 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 8960, 33622048, 0, 8960, 33622048, 0, 8960, 33622048, 0, 8960, 33622048, 0, 8976, 33622048, 0, 8976, 33622048, 0, 8976, 33622048, 0, 8976, 33622048, 0, 8992, 33622048, 0, 8992, 33622048, 0, 8992, 33622048, 0, 8992, 33622048, 0, 10624, 64, 0, 10640, 64, 0, 10656, 64, 0, 11972, 2147483648, 0, 11976, 2147483648, 0, 11980, 2147483648, 0, 11988, 2147483648, 0, 11992, 2147483648, 0, 11996, 2147483648, 0, 12004, 2147483648, 0, 12008, 2147483648, 0, 12012, 2147483648, 0, 14144, 268435968, 0, 14144, 268435968, 0, 14160, 268435968, 0, 14160, 268435968, 0, 14176, 268435968, 0, 14176, 268435968, 0, 14976, 17, 0, 14976, 17, 0, 16448, 570425344, 0, 16448, 570425344, 0, 16452, 570425344, 0, 16452, 570425344, 0, 16456, 570425344, 0, 16456, 570425344, 0, 16464, 570425344, 0, 16464, 570425344, 0, 16468, 570425344, 0, 16468, 570425344, 0, 16472, 570425344, 0, 16472, 570425344, 0, 16480, 570425344, 0, 16480, 570425344, 0, 16484, 570425344, 0, 16484, 570425344, 0, 16488, 570425344, 0, 16488, 570425344, 0, 19264, 2, 0, 19268, 2, 0, 19272, 2, 0, 19280, 2, 0, 19284, 2, 0, 19288, 2, 0, 19296, 2, 0, 19300, 2, 0, 19304, 2, 0, 21632, 1073741824, 0, 30352, 4195328, 0, 30352, 4195328, 0, 30368, 4195328, 0, 30368, 4195328, 0, 30384, 4195328, 0, 30384, 4195328, 0, 31616, 67125252, 0, 31616, 67125252, 0, 31616, 67125252, 0, 32064, 559240, 0, 32064, 559240, 0, 32064, 559240, 0, 32064, 559240, 0, 32064, 559240, 0, 768, 73, 0, 768, 73, 0, 768, 73, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 6976, 613566756, 0, 8960, 33622048, 0, 8960, 33622048, 0, 8960, 33622048, 0, 8960, 33622048, 0, 8976, 33622048, 0, 8976, 33622048, 0, 8976, 33622048, 0, 8976, 33622048, 0, 8992, 33622048, 0, 8992, 33622048, 0, 8992, 33622048, 0, 8992, 33622048, 0, 10624, 64, 0, 10640, 64, 0, 10656, 64, 0, 11972, 2147483648, 0, 11976, 2147483648, 0, 11980, 2147483648, 0, 11988, 2147483648, 0, 11992, 2147483648, 0, 11996, 2147483648, 0, 12004, 2147483648, 0, 12008, 2147483648, 0, 12012, 2147483648, 0, 14144, 268435968, 0, 14144, 268435968, 0, 14160, 268435968, 0, 14160, 268435968, 0, 14176, 268435968, 0, 14176, 268435968, 0, 14976, 17, 0, 14976, 17, 0, 16448, 570425344, 0, 16448, 570425344, 0, 16452, 570425344, 0, 16452, 570425344, 0, 16456, 570425344, 0, 16456, 570425344, 0, 16464, 570425344, 0, 16464, 570425344, 0, 16468, 570425344, 0, 16468, 570425344, 0, 16472, 570425344, 0, 16472, 570425344, 0, 16480, 570425344, 0, 16480, 570425344, 0, 16484, 570425344, 0, 16484, 570425344, 0, 16488, 570425344, 0, 16488, 570425344, 0, 19264, 2, 0, 19268, 2, 0, 19272, 2, 0, 19280, 2, 0, 19284, 2, 0, 19288, 2, 0, 19296, 2, 0, 19300, 2, 0, 19304, 2, 0, 21632, 1073741824, 0, 30352, 4195328, 0, 30352, 4195328, 0, 30368, 4195328, 0, 30368, 4195328, 0, 30384, 4195328, 0, 30384, 4195328, 0, 31616, 67125252, 0, 31616, 67125252, 0, 31616, 67125252, 0, 32064, 559240, 0, 32064, 559240, 0, 32064, 559240, 0, 32064, 559240, 0, 32064, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756382249440599441_445_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756382249440599441_445_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ad303d8b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756382249440599441_445_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 4, 0, 1056, 4, 0, 1072, 4, 0, 2196, 4, 0, 2200, 4, 0, 2204, 4, 0, 2212, 4, 0, 2216, 4, 0, 2220, 4, 0, 2228, 4, 0, 2232, 4, 0, 2236, 4, 0, 3264, 10, 0, 3264, 10, 0, 5248, 559240, 0, 5248, 559240, 0, 5248, 559240, 0, 5248, 559240, 0, 5248, 559240, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 7808, 8192, 0, 7824, 8192, 0, 7840, 8192, 0, 11712, 67239937, 0, 11712, 67239937, 0, 11712, 67239937, 0, 11328, 134217856, 0, 11328, 134217856, 0, 11072, 1536, 0, 11072, 1536, 0, 10816, 537399296, 0, 10816, 537399296, 0, 10816, 537399296, 0, 1040, 4, 0, 1056, 4, 0, 1072, 4, 0, 2196, 4, 0, 2200, 4, 0, 2204, 4, 0, 2212, 4, 0, 2216, 4, 0, 2220, 4, 0, 2228, 4, 0, 2232, 4, 0, 2236, 4, 0, 3264, 10, 0, 3264, 10, 0, 5248, 559240, 0, 5248, 559240, 0, 5248, 559240, 0, 5248, 559240, 0, 5248, 559240, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 6080, 2863136768, 0, 7808, 8192, 0, 7824, 8192, 0, 7840, 8192, 0, 11712, 67239937, 0, 11712, 67239937, 0, 11712, 67239937, 0, 11328, 134217856, 0, 11328, 134217856, 0, 11072, 1536, 0, 11072, 1536, 0, 10816, 537399296, 0, 10816, 537399296, 0, 10816, 537399296, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756382592466532121_447_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756382592466532121_447_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..764884fc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756382592466532121_447_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,296 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 19))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 18))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 27)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 4194304, 0, 1808, 4194304, 0, 6980, 2147483648, 0, 6984, 2147483648, 0, 6996, 2147483648, 0, 7000, 2147483648, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 4194304, 0, 1808, 4194304, 0, 6980, 2147483648, 0, 6984, 2147483648, 0, 6996, 2147483648, 0, 7000, 2147483648, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383049060469137_451_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383049060469137_451_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3589d3d7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383049060469137_451_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,92 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383049324322792_452_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383049324322792_452_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..064fc0db --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383049324322792_452_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,183 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 26)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 268435457, 0, 1984, 268435457, 0, 2000, 268435457, 0, 2000, 268435457, 0, 2432, 286261248, 0, 2432, 286261248, 0, 2432, 286261248, 0, 2448, 286261248, 0, 2448, 286261248, 0, 2448, 286261248, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 7424, 8, 0, 8960, 8388608, 0, 8976, 8388608, 0, 8992, 8388608, 0, 1984, 268435457, 0, 1984, 268435457, 0, 2000, 268435457, 0, 2000, 268435457, 0, 2432, 286261248, 0, 2432, 286261248, 0, 2432, 286261248, 0, 2448, 286261248, 0, 2448, 286261248, 0, 2448, 286261248, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 6784, 1145324612, 0, 7424, 8, 0, 8960, 8388608, 0, 8976, 8388608, 0, 8992, 8388608, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383062656310108_454_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383062656310108_454_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f41f2125 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383062656310108_454_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,130 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 16777216, 0, 2564, 16777216, 0, 2568, 16777216, 0, 2576, 16777216, 0, 2580, 16777216, 0, 2584, 16777216, 0, 3712, 1, 0, 3728, 1, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 2560, 16777216, 0, 2564, 16777216, 0, 2568, 16777216, 0, 2576, 16777216, 0, 2580, 16777216, 0, 2584, 16777216, 0, 3712, 1, 0, 3728, 1, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0, 4608, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383064960016839_455_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383064960016839_455_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c3922c3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383064960016839_455_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2896, 33562632, 0, 2896, 33562632, 0, 2896, 33562632, 0, 3924, 33562632, 0, 3924, 33562632, 0, 3924, 33562632, 0, 3928, 33562632, 0, 3928, 33562632, 0, 3928, 33562632, 0, 4624, 33562632, 0, 4624, 33562632, 0, 4624, 33562632, 0, 5888, 8192, 0, 2896, 33562632, 0, 2896, 33562632, 0, 2896, 33562632, 0, 3924, 33562632, 0, 3924, 33562632, 0, 3924, 33562632, 0, 3928, 33562632, 0, 3928, 33562632, 0, 3928, 33562632, 0, 4624, 33562632, 0, 4624, 33562632, 0, 4624, 33562632, 0, 5888, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383529042175754_461_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383529042175754_461_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ea24dce4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383529042175754_461_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 31))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383544133545025_463_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383544133545025_463_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e909bd62 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383544133545025_463_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2000, 134283520, 0, 2000, 134283520, 0, 2000, 134283520, 0, 2016, 134283520, 0, 2016, 134283520, 0, 2016, 134283520, 0, 2032, 134283520, 0, 2032, 134283520, 0, 2032, 134283520, 0, 4816, 196672, 0, 4816, 196672, 0, 4816, 196672, 0, 4832, 196672, 0, 4832, 196672, 0, 4832, 196672, 0, 4848, 196672, 0, 4848, 196672, 0, 4848, 196672, 0, 7744, 16908290, 0, 7744, 16908290, 0, 7744, 16908290, 0, 7360, 541704, 0, 7360, 541704, 0, 7360, 541704, 0, 7360, 541704, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2000, 134283520, 0, 2000, 134283520, 0, 2000, 134283520, 0, 2016, 134283520, 0, 2016, 134283520, 0, 2016, 134283520, 0, 2032, 134283520, 0, 2032, 134283520, 0, 2032, 134283520, 0, 4816, 196672, 0, 4816, 196672, 0, 4816, 196672, 0, 4832, 196672, 0, 4832, 196672, 0, 4832, 196672, 0, 4848, 196672, 0, 4848, 196672, 0, 4848, 196672, 0, 7744, 16908290, 0, 7744, 16908290, 0, 7744, 16908290, 0, 7360, 541704, 0, 7360, 541704, 0, 7360, 541704, 0, 7360, 541704, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383798590106695_465_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383798590106695_465_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c1eb6f8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383798590106695_465_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,210 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7744, 1, 0, 8320, 1, 0, 8640, 4, 0, 7744, 1, 0, 8320, 1, 0, 8640, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383800550147408_466_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383800550147408_466_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d256aa4b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383800550147408_466_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 17, 0, 1408, 17, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2752, 69905, 0, 2752, 69905, 0, 2752, 69905, 0, 2752, 69905, 0, 2752, 69905, 0, 5392, 32, 0, 7056, 32, 0, 8016, 8192, 0, 9232, 8224, 0, 9232, 8224, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10752, 559240, 0, 10752, 559240, 0, 10752, 559240, 0, 10752, 559240, 0, 10752, 559240, 0, 1408, 17, 0, 1408, 17, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 1984, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2304, 269553937, 0, 2752, 69905, 0, 2752, 69905, 0, 2752, 69905, 0, 2752, 69905, 0, 2752, 69905, 0, 5392, 32, 0, 7056, 32, 0, 8016, 8192, 0, 9232, 8224, 0, 9232, 8224, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10752, 559240, 0, 10752, 559240, 0, 10752, 559240, 0, 10752, 559240, 0, 10752, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383828297232166_468_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383828297232166_468_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6b56b7f1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383828297232166_468_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,535 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((392 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((407 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((454 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((475 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (484 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (495 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (506 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((520 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((536 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((545 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((554 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (558 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 384 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2432, 16777216, 0, 3008, 268435456, 0, 7104, 570425344, 0, 7104, 570425344, 0, 7120, 570425344, 0, 7120, 570425344, 0, 7136, 570425344, 0, 7136, 570425344, 0, 8512, 131072, 0, 8528, 131072, 0, 8544, 131072, 0, 10496, 33554432, 0, 10512, 33554432, 0, 10528, 33554432, 0, 12416, 572522496, 0, 12416, 572522496, 0, 12416, 572522496, 0, 12432, 572522496, 0, 12432, 572522496, 0, 12432, 572522496, 0, 12448, 572522496, 0, 12448, 572522496, 0, 12448, 572522496, 0, 21440, 4, 0, 22080, 64, 0, 23616, 1024, 0, 26496, 559240, 0, 26496, 559240, 0, 26496, 559240, 0, 26496, 559240, 0, 26496, 559240, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 33296, 2684354560, 0, 33296, 2684354560, 0, 33312, 2684354560, 0, 33312, 2684354560, 0, 33328, 2684354560, 0, 33328, 2684354560, 0, 35472, 2852126720, 0, 35472, 2852126720, 0, 35472, 2852126720, 0, 35472, 2852126720, 0, 35488, 2852126720, 0, 35488, 2852126720, 0, 35488, 2852126720, 0, 35488, 2852126720, 0, 35504, 2852126720, 0, 35504, 2852126720, 0, 35504, 2852126720, 0, 35504, 2852126720, 0, 2432, 16777216, 0, 3008, 268435456, 0, 7104, 570425344, 0, 7104, 570425344, 0, 7120, 570425344, 0, 7120, 570425344, 0, 7136, 570425344, 0, 7136, 570425344, 0, 8512, 131072, 0, 8528, 131072, 0, 8544, 131072, 0, 10496, 33554432, 0, 10512, 33554432, 0, 10528, 33554432, 0, 12416, 572522496, 0, 12416, 572522496, 0, 12416, 572522496, 0, 12432, 572522496, 0, 12432, 572522496, 0, 12432, 572522496, 0, 12448, 572522496, 0, 12448, 572522496, 0, 12448, 572522496, 0, 21440, 4, 0, 22080, 64, 0, 23616, 1024, 0, 26496, 559240, 0, 26496, 559240, 0, 26496, 559240, 0, 26496, 559240, 0, 26496, 559240, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 31680, 1342177365, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 32384, 1426063701, 0, 33296, 2684354560, 0, 33296, 2684354560, 0, 33312, 2684354560, 0, 33312, 2684354560, 0, 33328, 2684354560, 0, 33328, 2684354560, 0, 35472, 2852126720, 0, 35472, 2852126720, 0, 35472, 2852126720, 0, 35472, 2852126720, 0, 35488, 2852126720, 0, 35488, 2852126720, 0, 35488, 2852126720, 0, 35488, 2852126720, 0, 35504, 2852126720, 0, 35504, 2852126720, 0, 35504, 2852126720, 0, 35504, 2852126720, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383953497155109_469_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383953497155109_469_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..361ec7b9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383953497155109_469_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,342 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1984, 73, 0, 1984, 73, 0, 1984, 73, 0, 6784, 16, 0, 7680, 4195328, 0, 7680, 4195328, 0, 8704, 272696336, 0, 8704, 272696336, 0, 8704, 272696336, 0, 8704, 272696336, 0, 8704, 272696336, 0, 10880, 128, 0, 10896, 128, 0, 10912, 128, 0, 18304, 603979812, 0, 18304, 603979812, 0, 18304, 603979812, 0, 18304, 603979812, 0, 18320, 603979812, 0, 18320, 603979812, 0, 18320, 603979812, 0, 18320, 603979812, 0, 19008, 536870912, 0, 19024, 536870912, 0, 19904, 2048, 0, 21056, 512, 0, 20672, 65536, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1984, 73, 0, 1984, 73, 0, 1984, 73, 0, 6784, 16, 0, 7680, 4195328, 0, 7680, 4195328, 0, 8704, 272696336, 0, 8704, 272696336, 0, 8704, 272696336, 0, 8704, 272696336, 0, 8704, 272696336, 0, 10880, 128, 0, 10896, 128, 0, 10912, 128, 0, 18304, 603979812, 0, 18304, 603979812, 0, 18304, 603979812, 0, 18304, 603979812, 0, 18320, 603979812, 0, 18320, 603979812, 0, 18320, 603979812, 0, 18320, 603979812, 0, 19008, 536870912, 0, 19024, 536870912, 0, 19904, 2048, 0, 21056, 512, 0, 20672, 65536, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756383964592916173_470_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756383964592916173_470_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..698f3caa --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756383964592916173_470_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 25))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 1, 0, 2128, 1, 0, 3008, 1, 0, 6400, 1048832, 0, 6400, 1048832, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7744, 559240, 0, 7744, 559240, 0, 7744, 559240, 0, 7744, 559240, 0, 7744, 559240, 0, 2112, 1, 0, 2128, 1, 0, 3008, 1, 0, 6400, 1048832, 0, 6400, 1048832, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7296, 1145324612, 0, 7744, 559240, 0, 7744, 559240, 0, 7744, 559240, 0, 7744, 559240, 0, 7744, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384335841424413_474_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384335841424413_474_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bd41e2ff --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384335841424413_474_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 27)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3584, 73, 0, 3584, 73, 0, 3584, 73, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 5120, 17, 0, 5120, 17, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3584, 73, 0, 3584, 73, 0, 3584, 73, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4160, 272696336, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 4480, 613566756, 0, 5120, 17, 0, 5120, 17, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0, 6016, 1145324612, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384337447986700_475_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384337447986700_475_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8545a1c9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384337447986700_475_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,103 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 16, 0, 4928, 268435456, 0, 3008, 16, 0, 4928, 268435456, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384340978039965_477_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384340978039965_477_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3abcc89 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384340978039965_477_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,480 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 27)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 27)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((330 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (366 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((398 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((405 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (428 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (432 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3968, 2097664, 0, 3968, 2097664, 0, 3984, 2097664, 0, 3984, 2097664, 0, 6912, 537002016, 0, 6912, 537002016, 0, 6912, 537002016, 0, 8656, 64, 0, 8672, 64, 0, 8688, 64, 0, 11664, 64, 0, 11680, 64, 0, 11696, 64, 0, 12240, 64, 0, 12256, 64, 0, 12272, 64, 0, 18816, 8, 0, 18832, 8, 0, 18848, 8, 0, 19520, 8, 0, 19536, 8, 0, 19552, 8, 0, 21136, 524416, 0, 21136, 524416, 0, 21152, 524416, 0, 21152, 524416, 0, 21168, 524416, 0, 21168, 524416, 0, 24128, 8, 0, 26496, 272696336, 0, 26496, 272696336, 0, 26496, 272696336, 0, 26496, 272696336, 0, 26496, 272696336, 0, 26816, 68174084, 0, 26816, 68174084, 0, 26816, 68174084, 0, 26816, 68174084, 0, 26816, 68174084, 0, 576, 17, 0, 576, 17, 0, 3968, 2097664, 0, 3968, 2097664, 0, 3984, 2097664, 0, 3984, 2097664, 0, 6912, 537002016, 0, 6912, 537002016, 0, 6912, 537002016, 0, 8656, 64, 0, 8672, 64, 0, 8688, 64, 0, 11664, 64, 0, 11680, 64, 0, 11696, 64, 0, 12240, 64, 0, 12256, 64, 0, 12272, 64, 0, 18816, 8, 0, 18832, 8, 0, 18848, 8, 0, 19520, 8, 0, 19536, 8, 0, 19552, 8, 0, 21136, 524416, 0, 21136, 524416, 0, 21152, 524416, 0, 21152, 524416, 0, 21168, 524416, 0, 21168, 524416, 0, 24128, 8, 0, 26496, 272696336, 0, 26496, 272696336, 0, 26496, 272696336, 0, 26496, 272696336, 0, 26496, 272696336, 0, 26816, 68174084, 0, 26816, 68174084, 0, 26816, 68174084, 0, 26816, 68174084, 0, 26816, 68174084, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384361996605550_478_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384361996605550_478_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..67c37a06 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384361996605550_478_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,425 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((215 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 16)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30))) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((389 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((415 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((424 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((433 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (444 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (453 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 726 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3792, 2097152, 0, 3808, 2097152, 0, 3824, 2097152, 0, 5264, 294912, 0, 5264, 294912, 0, 5280, 294912, 0, 5280, 294912, 0, 5296, 294912, 0, 5296, 294912, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 11136, 18724, 0, 11136, 18724, 0, 11136, 18724, 0, 11136, 18724, 0, 11136, 18724, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 17344, 8, 0, 20224, 8388608, 0, 27152, 1073741888, 0, 27152, 1073741888, 0, 27168, 1073741888, 0, 27168, 1073741888, 0, 3792, 2097152, 0, 3808, 2097152, 0, 3824, 2097152, 0, 5264, 294912, 0, 5264, 294912, 0, 5280, 294912, 0, 5280, 294912, 0, 5296, 294912, 0, 5296, 294912, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 11136, 18724, 0, 11136, 18724, 0, 11136, 18724, 0, 11136, 18724, 0, 11136, 18724, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12688, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 12704, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13776, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13780, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13792, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 13796, 1431655765, 0, 17344, 8, 0, 20224, 8388608, 0, 27152, 1073741888, 0, 27152, 1073741888, 0, 27168, 1073741888, 0, 27168, 1073741888, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384439429236848_480_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384439429236848_480_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b5e27279 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384439429236848_480_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,152 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 31))) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0, 5888, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384439703765521_481_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384439703765521_481_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eed71747 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384439703765521_481_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((213 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((222 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 13)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 762 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 134217728, 0, 2576, 134217728, 0, 2592, 134217728, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 4992, 2147483650, 0, 4992, 2147483650, 0, 5008, 2147483650, 0, 5008, 2147483650, 0, 5024, 2147483650, 0, 5024, 2147483650, 0, 5696, 2147483650, 0, 5696, 2147483650, 0, 5712, 2147483650, 0, 5712, 2147483650, 0, 5728, 2147483650, 0, 5728, 2147483650, 0, 6144, 306184192, 0, 6144, 306184192, 0, 6144, 306184192, 0, 6160, 306184192, 0, 6160, 306184192, 0, 6160, 306184192, 0, 6176, 306184192, 0, 6176, 306184192, 0, 6176, 306184192, 0, 6848, 272696336, 0, 6848, 272696336, 0, 6848, 272696336, 0, 6848, 272696336, 0, 6848, 272696336, 0, 6864, 272696336, 0, 6864, 272696336, 0, 6864, 272696336, 0, 6864, 272696336, 0, 6864, 272696336, 0, 6880, 272696336, 0, 6880, 272696336, 0, 6880, 272696336, 0, 6880, 272696336, 0, 6880, 272696336, 0, 13636, 256, 0, 13640, 256, 0, 13644, 256, 0, 13652, 256, 0, 13656, 256, 0, 13660, 256, 0, 13668, 256, 0, 13672, 256, 0, 13676, 256, 0, 14212, 256, 0, 14216, 256, 0, 14220, 256, 0, 14228, 256, 0, 14232, 256, 0, 14236, 256, 0, 14244, 256, 0, 14248, 256, 0, 14252, 256, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 16128, 63, 0, 16128, 63, 0, 16128, 63, 0, 16128, 63, 0, 16128, 63, 0, 16128, 63, 0, 17280, 15, 0, 17280, 15, 0, 17280, 15, 0, 17280, 15, 0, 18448, 15, 0, 18448, 15, 0, 18448, 15, 0, 18448, 15, 0, 18464, 15, 0, 18464, 15, 0, 18464, 15, 0, 18464, 15, 0, 18480, 15, 0, 18480, 15, 0, 18480, 15, 0, 18480, 15, 0, 19136, 15, 0, 19136, 15, 0, 19136, 15, 0, 19136, 15, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 2560, 134217728, 0, 2576, 134217728, 0, 2592, 134217728, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3840, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3856, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 3872, 2181570690, 0, 4992, 2147483650, 0, 4992, 2147483650, 0, 5008, 2147483650, 0, 5008, 2147483650, 0, 5024, 2147483650, 0, 5024, 2147483650, 0, 5696, 2147483650, 0, 5696, 2147483650, 0, 5712, 2147483650, 0, 5712, 2147483650, 0, 5728, 2147483650, 0, 5728, 2147483650, 0, 6144, 306184192, 0, 6144, 306184192, 0, 6144, 306184192, 0, 6160, 306184192, 0, 6160, 306184192, 0, 6160, 306184192, 0, 6176, 306184192, 0, 6176, 306184192, 0, 6176, 306184192, 0, 6848, 272696336, 0, 6848, 272696336, 0, 6848, 272696336, 0, 6848, 272696336, 0, 6848, 272696336, 0, 6864, 272696336, 0, 6864, 272696336, 0, 6864, 272696336, 0, 6864, 272696336, 0, 6864, 272696336, 0, 6880, 272696336, 0, 6880, 272696336, 0, 6880, 272696336, 0, 6880, 272696336, 0, 6880, 272696336, 0, 13636, 256, 0, 13640, 256, 0, 13644, 256, 0, 13652, 256, 0, 13656, 256, 0, 13660, 256, 0, 13668, 256, 0, 13672, 256, 0, 13676, 256, 0, 14212, 256, 0, 14216, 256, 0, 14220, 256, 0, 14228, 256, 0, 14232, 256, 0, 14236, 256, 0, 14244, 256, 0, 14248, 256, 0, 14252, 256, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 15488, 1431655765, 0, 16128, 63, 0, 16128, 63, 0, 16128, 63, 0, 16128, 63, 0, 16128, 63, 0, 16128, 63, 0, 17280, 15, 0, 17280, 15, 0, 17280, 15, 0, 17280, 15, 0, 18448, 15, 0, 18448, 15, 0, 18448, 15, 0, 18448, 15, 0, 18464, 15, 0, 18464, 15, 0, 18464, 15, 0, 18464, 15, 0, 18480, 15, 0, 18480, 15, 0, 18480, 15, 0, 18480, 15, 0, 19136, 15, 0, 19136, 15, 0, 19136, 15, 0, 19136, 15, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0, 19712, 1023, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384458674419501_482_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384458674419501_482_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..453a4a4d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384458674419501_482_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,175 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() >= 19)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 17))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 2964, 2860515328, 0, 2964, 2860515328, 0, 2964, 2860515328, 0, 2964, 2860515328, 0, 2964, 2860515328, 0, 2968, 2860515328, 0, 2968, 2860515328, 0, 2968, 2860515328, 0, 2968, 2860515328, 0, 2968, 2860515328, 0, 2980, 2860515328, 0, 2980, 2860515328, 0, 2980, 2860515328, 0, 2980, 2860515328, 0, 2980, 2860515328, 0, 2984, 2860515328, 0, 2984, 2860515328, 0, 2984, 2860515328, 0, 2984, 2860515328, 0, 2984, 2860515328, 0, 2996, 2860515328, 0, 2996, 2860515328, 0, 2996, 2860515328, 0, 2996, 2860515328, 0, 2996, 2860515328, 0, 3000, 2860515328, 0, 3000, 2860515328, 0, 3000, 2860515328, 0, 3000, 2860515328, 0, 3000, 2860515328, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 2964, 2860515328, 0, 2964, 2860515328, 0, 2964, 2860515328, 0, 2964, 2860515328, 0, 2964, 2860515328, 0, 2968, 2860515328, 0, 2968, 2860515328, 0, 2968, 2860515328, 0, 2968, 2860515328, 0, 2968, 2860515328, 0, 2980, 2860515328, 0, 2980, 2860515328, 0, 2980, 2860515328, 0, 2980, 2860515328, 0, 2980, 2860515328, 0, 2984, 2860515328, 0, 2984, 2860515328, 0, 2984, 2860515328, 0, 2984, 2860515328, 0, 2984, 2860515328, 0, 2996, 2860515328, 0, 2996, 2860515328, 0, 2996, 2860515328, 0, 2996, 2860515328, 0, 2996, 2860515328, 0, 3000, 2860515328, 0, 3000, 2860515328, 0, 3000, 2860515328, 0, 3000, 2860515328, 0, 3000, 2860515328, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7168, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0, 7744, 349525, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384925125226184_484_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384925125226184_484_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9e397faf --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384925125226184_484_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,426 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 26))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 7))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 272696336, 0, 3072, 272696336, 0, 3072, 272696336, 0, 3072, 272696336, 0, 3072, 272696336, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 4224, 1, 0, 4800, 268501008, 0, 4800, 268501008, 0, 4800, 268501008, 0, 10688, 64, 0, 13440, 67125252, 0, 13440, 67125252, 0, 13440, 67125252, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 15232, 1342177285, 0, 15232, 1342177285, 0, 15232, 1342177285, 0, 15232, 1342177285, 0, 20800, 1342177301, 0, 20800, 1342177301, 0, 20800, 1342177301, 0, 20800, 1342177301, 0, 20800, 1342177301, 0, 3072, 272696336, 0, 3072, 272696336, 0, 3072, 272696336, 0, 3072, 272696336, 0, 3072, 272696336, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 3392, 613566756, 0, 4224, 1, 0, 4800, 268501008, 0, 4800, 268501008, 0, 4800, 268501008, 0, 10688, 64, 0, 13440, 67125252, 0, 13440, 67125252, 0, 13440, 67125252, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 15232, 1342177285, 0, 15232, 1342177285, 0, 15232, 1342177285, 0, 15232, 1342177285, 0, 20800, 1342177301, 0, 20800, 1342177301, 0, 20800, 1342177301, 0, 20800, 1342177301, 0, 20800, 1342177301, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384974795873514_487_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384974795873514_487_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aad48ec5 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384974795873514_487_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 1048576, 0, 1216, 134283520, 0, 1216, 134283520, 0, 1216, 134283520, 0, 1600, 1048576, 0, 1216, 134283520, 0, 1216, 134283520, 0, 1216, 134283520, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384974989254337_488_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384974989254337_488_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fc3c80a5 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384974989254337_488_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9600, 262144, 0, 9600, 262144, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756384975160699414_489_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756384975160699414_489_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0b2f679 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756384975160699414_489_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385245227757787_494_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385245227757787_494_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cdec7b27 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385245227757787_494_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 11))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4228, 4, 0, 4232, 4, 0, 4244, 4, 0, 4248, 4, 0, 4260, 4, 0, 4264, 4, 0, 4228, 4, 0, 4232, 4, 0, 4244, 4, 0, 4248, 4, 0, 4260, 4, 0, 4264, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385245729146486_495_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385245729146486_495_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4143a6a3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385245729146486_495_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,363 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 20)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 13)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 272696336, 0, 2560, 272696336, 0, 2560, 272696336, 0, 2560, 272696336, 0, 2560, 272696336, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 12864, 4195328, 0, 12864, 4195328, 0, 14400, 67108864, 0, 16576, 559240, 0, 16576, 559240, 0, 16576, 559240, 0, 16576, 559240, 0, 16576, 559240, 0, 2560, 272696336, 0, 2560, 272696336, 0, 2560, 272696336, 0, 2560, 272696336, 0, 2560, 272696336, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 2880, 613566756, 0, 12864, 4195328, 0, 12864, 4195328, 0, 14400, 67108864, 0, 16576, 559240, 0, 16576, 559240, 0, 16576, 559240, 0, 16576, 559240, 0, 16576, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385247181971175_496_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385247181971175_496_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3c0f8b0a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385247181971175_496_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 480 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2960, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2964, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2976, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0, 2980, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385251424802929_497_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385251424802929_497_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9eed63bb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385251424802929_497_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 27))) { + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 4544, 3758096639, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6400, 1431655765, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0, 6016, 2860515370, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385303938978249_500_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385303938978249_500_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d3ccc857 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385303938978249_500_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,382 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((355 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((360 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((367 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((386 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (403 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (446 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (455 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [17664, 67108868, 0, 17664, 67108868, 0, 17680, 67108868, 0, 17680, 67108868, 0, 17696, 67108868, 0, 17696, 67108868, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 26688, 85, 0, 26688, 85, 0, 26688, 85, 0, 26688, 85, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 28544, 85, 0, 28544, 85, 0, 28544, 85, 0, 28544, 85, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 17664, 67108868, 0, 17664, 67108868, 0, 17680, 67108868, 0, 17680, 67108868, 0, 17696, 67108868, 0, 17696, 67108868, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23040, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23056, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 23072, 1145324612, 0, 26688, 85, 0, 26688, 85, 0, 26688, 85, 0, 26688, 85, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 27264, 1431655765, 0, 28544, 85, 0, 28544, 85, 0, 28544, 85, 0, 28544, 85, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0, 29120, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385309866711391_501_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385309866711391_501_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29e1a3b3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385309866711391_501_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 17))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 5376, 8, 0, 7168, 8390656, 0, 7168, 8390656, 0, 576, 17, 0, 576, 17, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 4736, 1145324612, 0, 5376, 8, 0, 7168, 8390656, 0, 7168, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385310525939311_502_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385310525939311_502_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f97b9c5e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385310525939311_502_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,321 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2304, 1073741828, 0, 2304, 1073741828, 0, 2944, 64, 0, 6144, 1073741828, 0, 6144, 1073741828, 0, 8192, 71582720, 0, 8192, 71582720, 0, 8192, 71582720, 0, 8192, 71582720, 0, 8192, 71582720, 0, 8196, 71582720, 0, 8196, 71582720, 0, 8196, 71582720, 0, 8196, 71582720, 0, 8196, 71582720, 0, 8208, 71582720, 0, 8208, 71582720, 0, 8208, 71582720, 0, 8208, 71582720, 0, 8208, 71582720, 0, 8212, 71582720, 0, 8212, 71582720, 0, 8212, 71582720, 0, 8212, 71582720, 0, 8212, 71582720, 0, 8832, 71303168, 0, 8832, 71303168, 0, 8848, 71303168, 0, 8848, 71303168, 0, 9472, 559240, 0, 9472, 559240, 0, 9472, 559240, 0, 9472, 559240, 0, 9472, 559240, 0, 10368, 85, 0, 10368, 85, 0, 10368, 85, 0, 10368, 85, 0, 14784, 128, 0, 15488, 2818572298, 0, 15488, 2818572298, 0, 15488, 2818572298, 0, 15488, 2818572298, 0, 15488, 2818572298, 0, 16704, 136347656, 0, 16704, 136347656, 0, 16704, 136347656, 0, 16704, 136347656, 0, 20608, 2147483690, 0, 20608, 2147483690, 0, 20608, 2147483690, 0, 20608, 2147483690, 0, 576, 17, 0, 576, 17, 0, 2304, 1073741828, 0, 2304, 1073741828, 0, 2944, 64, 0, 6144, 1073741828, 0, 6144, 1073741828, 0, 8192, 71582720, 0, 8192, 71582720, 0, 8192, 71582720, 0, 8192, 71582720, 0, 8192, 71582720, 0, 8196, 71582720, 0, 8196, 71582720, 0, 8196, 71582720, 0, 8196, 71582720, 0, 8196, 71582720, 0, 8208, 71582720, 0, 8208, 71582720, 0, 8208, 71582720, 0, 8208, 71582720, 0, 8208, 71582720, 0, 8212, 71582720, 0, 8212, 71582720, 0, 8212, 71582720, 0, 8212, 71582720, 0, 8212, 71582720, 0, 8832, 71303168, 0, 8832, 71303168, 0, 8848, 71303168, 0, 8848, 71303168, 0, 9472, 559240, 0, 9472, 559240, 0, 9472, 559240, 0, 9472, 559240, 0, 9472, 559240, 0, 10368, 85, 0, 10368, 85, 0, 10368, 85, 0, 10368, 85, 0, 14784, 128, 0, 15488, 2818572298, 0, 15488, 2818572298, 0, 15488, 2818572298, 0, 15488, 2818572298, 0, 15488, 2818572298, 0, 16704, 136347656, 0, 16704, 136347656, 0, 16704, 136347656, 0, 16704, 136347656, 0, 20608, 2147483690, 0, 20608, 2147483690, 0, 20608, 2147483690, 0, 20608, 2147483690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385349677189896_503_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385349677189896_503_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..919f04b7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385349677189896_503_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 1049600, 0, 1808, 1049600, 0, 1824, 1049600, 0, 1824, 1049600, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 1808, 1049600, 0, 1808, 1049600, 0, 1824, 1049600, 0, 1824, 1049600, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0, 2560, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385350180564091_504_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385350180564091_504_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..31da1f9f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385350180564091_504_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 282 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 1, 0, 2688, 32, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2048, 50331648, 0, 2048, 50331648, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 6464, 1090519041, 0, 6464, 1090519041, 0, 6464, 1090519041, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 8784, 3221225483, 0, 8784, 3221225483, 0, 8784, 3221225483, 0, 8784, 3221225483, 0, 8784, 3221225483, 0, 8800, 3221225483, 0, 8800, 3221225483, 0, 8800, 3221225483, 0, 8800, 3221225483, 0, 8800, 3221225483, 0, 11136, 37749120, 0, 11136, 37749120, 0, 11136, 37749120, 0, 11136, 37749120, 0, 3072, 1, 0, 2688, 32, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2432, 4227858654, 0, 2048, 50331648, 0, 2048, 50331648, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 1664, 5592320, 0, 6464, 1090519041, 0, 6464, 1090519041, 0, 6464, 1090519041, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 7616, 3623878659, 0, 8784, 3221225483, 0, 8784, 3221225483, 0, 8784, 3221225483, 0, 8784, 3221225483, 0, 8784, 3221225483, 0, 8800, 3221225483, 0, 8800, 3221225483, 0, 8800, 3221225483, 0, 8800, 3221225483, 0, 8800, 3221225483, 0, 11136, 37749120, 0, 11136, 37749120, 0, 11136, 37749120, 0, 11136, 37749120, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385353904638611_505_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385353904638611_505_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d593aee6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385353904638611_505_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,254 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 28))) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 384 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5696, 256, 0, 5712, 256, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 12608, 134217728, 0, 12612, 134217728, 0, 12624, 134217728, 0, 12628, 134217728, 0, 12640, 134217728, 0, 12644, 134217728, 0, 13696, 33554464, 0, 13696, 33554464, 0, 13700, 33554464, 0, 13700, 33554464, 0, 13712, 33554464, 0, 13712, 33554464, 0, 13716, 33554464, 0, 13716, 33554464, 0, 13728, 33554464, 0, 13728, 33554464, 0, 13732, 33554464, 0, 13732, 33554464, 0, 5696, 256, 0, 5712, 256, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 8128, 838860, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 9280, 2863311530, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10240, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10256, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 10272, 2862612480, 0, 12608, 134217728, 0, 12612, 134217728, 0, 12624, 134217728, 0, 12628, 134217728, 0, 12640, 134217728, 0, 12644, 134217728, 0, 13696, 33554464, 0, 13696, 33554464, 0, 13700, 33554464, 0, 13700, 33554464, 0, 13712, 33554464, 0, 13712, 33554464, 0, 13716, 33554464, 0, 13716, 33554464, 0, 13728, 33554464, 0, 13728, 33554464, 0, 13732, 33554464, 0, 13732, 33554464, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385381186694457_506_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385381186694457_506_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2f312fba --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385381186694457_506_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 4456464, 0, 4096, 4456464, 0, 4096, 4456464, 0, 3712, 15, 0, 3712, 15, 0, 3712, 15, 0, 3712, 15, 0, 3456, 2147484128, 0, 3456, 2147484128, 0, 3456, 2147484128, 0, 3456, 2147484128, 0, 3456, 2147484128, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 2688, 536870912, 0, 4096, 4456464, 0, 4096, 4456464, 0, 4096, 4456464, 0, 3712, 15, 0, 3712, 15, 0, 3712, 15, 0, 3712, 15, 0, 3456, 2147484128, 0, 3456, 2147484128, 0, 3456, 2147484128, 0, 3456, 2147484128, 0, 3456, 2147484128, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 3072, 1427198976, 0, 2688, 536870912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385556112650660_508_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385556112650660_508_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b46d900b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385556112650660_508_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385752506609421_513_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385752506609421_513_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..473612e4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385752506609421_513_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4160, 1342177301, 0, 4160, 1342177301, 0, 4160, 1342177301, 0, 4160, 1342177301, 0, 4160, 1342177301, 0, 4176, 1342177301, 0, 4176, 1342177301, 0, 4176, 1342177301, 0, 4176, 1342177301, 0, 4176, 1342177301, 0, 4192, 1342177301, 0, 4192, 1342177301, 0, 4192, 1342177301, 0, 4192, 1342177301, 0, 4192, 1342177301, 0, 4800, 67108864, 0, 5760, 15, 0, 5760, 15, 0, 5760, 15, 0, 5760, 15, 0, 5776, 15, 0, 5776, 15, 0, 5776, 15, 0, 5776, 15, 0, 6208, 31, 0, 6208, 31, 0, 6208, 31, 0, 6208, 31, 0, 6208, 31, 0, 6224, 31, 0, 6224, 31, 0, 6224, 31, 0, 6224, 31, 0, 6224, 31, 0, 4160, 1342177301, 0, 4160, 1342177301, 0, 4160, 1342177301, 0, 4160, 1342177301, 0, 4160, 1342177301, 0, 4176, 1342177301, 0, 4176, 1342177301, 0, 4176, 1342177301, 0, 4176, 1342177301, 0, 4176, 1342177301, 0, 4192, 1342177301, 0, 4192, 1342177301, 0, 4192, 1342177301, 0, 4192, 1342177301, 0, 4192, 1342177301, 0, 4800, 67108864, 0, 5760, 15, 0, 5760, 15, 0, 5760, 15, 0, 5760, 15, 0, 5776, 15, 0, 5776, 15, 0, 5776, 15, 0, 5776, 15, 0, 6208, 31, 0, 6208, 31, 0, 6208, 31, 0, 6208, 31, 0, 6208, 31, 0, 6224, 31, 0, 6224, 31, 0, 6224, 31, 0, 6224, 31, 0, 6224, 31, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385755102600431_514_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385755102600431_514_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..41c0d87e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385755102600431_514_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,263 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 576 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 262144, 0, 2240, 16778272, 0, 2240, 16778272, 0, 2240, 16778272, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 5248, 16908292, 0, 5248, 16908292, 0, 5248, 16908292, 0, 4864, 524288, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 7296, 285212672, 0, 7296, 285212672, 0, 7312, 285212672, 0, 7312, 285212672, 0, 7328, 285212672, 0, 7328, 285212672, 0, 7744, 286261248, 0, 7744, 286261248, 0, 7744, 286261248, 0, 7760, 286261248, 0, 7760, 286261248, 0, 7760, 286261248, 0, 7776, 286261248, 0, 7776, 286261248, 0, 7776, 286261248, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12928, 559240, 0, 12928, 559240, 0, 12928, 559240, 0, 12928, 559240, 0, 12928, 559240, 0, 2624, 262144, 0, 2240, 16778272, 0, 2240, 16778272, 0, 2240, 16778272, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 1984, 1414615381, 0, 5248, 16908292, 0, 5248, 16908292, 0, 5248, 16908292, 0, 4864, 524288, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 4608, 4269801723, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 6336, 286331153, 0, 7296, 285212672, 0, 7296, 285212672, 0, 7312, 285212672, 0, 7312, 285212672, 0, 7328, 285212672, 0, 7328, 285212672, 0, 7744, 286261248, 0, 7744, 286261248, 0, 7744, 286261248, 0, 7760, 286261248, 0, 7760, 286261248, 0, 7760, 286261248, 0, 7776, 286261248, 0, 7776, 286261248, 0, 7776, 286261248, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11280, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 11296, 1145324612, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12496, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12512, 572662306, 0, 12928, 559240, 0, 12928, 559240, 0, 12928, 559240, 0, 12928, 559240, 0, 12928, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385818558582751_516_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385818558582751_516_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0e81c1fa --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385818558582751_516_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,209 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 738 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5120, 268436480, 0, 5120, 268436480, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5120, 268436480, 0, 5120, 268436480, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 7936, 959, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9024, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9040, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9056, 4009754624, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9920, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9936, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 9952, 2863311530, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10944, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10960, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 10976, 4024434688, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0, 11584, 959, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385836896170210_517_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385836896170210_517_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fd8aa93b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385836896170210_517_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 7)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 2112, 17, 0, 2112, 17, 0, 4096, 4, 0, 4112, 4, 0, 8592, 2147483784, 0, 8592, 2147483784, 0, 8592, 2147483784, 0, 8608, 2147483784, 0, 8608, 2147483784, 0, 8608, 2147483784, 0, 10176, 2147483784, 0, 10176, 2147483784, 0, 10176, 2147483784, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 2112, 17, 0, 2112, 17, 0, 4096, 4, 0, 4112, 4, 0, 8592, 2147483784, 0, 8592, 2147483784, 0, 8592, 2147483784, 0, 8608, 2147483784, 0, 8608, 2147483784, 0, 8608, 2147483784, 0, 10176, 2147483784, 0, 10176, 2147483784, 0, 10176, 2147483784, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385839053667425_518_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385839053667425_518_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f6325ba3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385839053667425_518_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,113 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 19)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 33554432, 0, 992, 33554432, 0, 1008, 33554432, 0, 2896, 2818572288, 0, 2896, 2818572288, 0, 2896, 2818572288, 0, 2900, 2818572288, 0, 2900, 2818572288, 0, 2900, 2818572288, 0, 2912, 2818572288, 0, 2912, 2818572288, 0, 2912, 2818572288, 0, 2916, 2818572288, 0, 2916, 2818572288, 0, 2916, 2818572288, 0, 2928, 2818572288, 0, 2928, 2818572288, 0, 2928, 2818572288, 0, 2932, 2818572288, 0, 2932, 2818572288, 0, 2932, 2818572288, 0, 976, 33554432, 0, 992, 33554432, 0, 1008, 33554432, 0, 2896, 2818572288, 0, 2896, 2818572288, 0, 2896, 2818572288, 0, 2900, 2818572288, 0, 2900, 2818572288, 0, 2900, 2818572288, 0, 2912, 2818572288, 0, 2912, 2818572288, 0, 2912, 2818572288, 0, 2916, 2818572288, 0, 2916, 2818572288, 0, 2916, 2818572288, 0, 2928, 2818572288, 0, 2928, 2818572288, 0, 2928, 2818572288, 0, 2932, 2818572288, 0, 2932, 2818572288, 0, 2932, 2818572288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385849428283168_519_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385849428283168_519_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5cc341e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385849428283168_519_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 29)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 1936, 1073741824, 0, 1952, 1073741824, 0, 5776, 1077940240, 0, 5776, 1077940240, 0, 5776, 1077940240, 0, 5776, 1077940240, 0, 5792, 1077940240, 0, 5792, 1077940240, 0, 5792, 1077940240, 0, 5792, 1077940240, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 1936, 1073741824, 0, 1952, 1073741824, 0, 5776, 1077940240, 0, 5776, 1077940240, 0, 5776, 1077940240, 0, 5776, 1077940240, 0, 5792, 1077940240, 0, 5792, 1077940240, 0, 5792, 1077940240, 0, 5792, 1077940240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385849986613662_520_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385849986613662_520_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..39be4b60 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385849986613662_520_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,203 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 23))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31))) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((205 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 1048576, 0, 3840, 85, 0, 3840, 85, 0, 3840, 85, 0, 3840, 85, 0, 11216, 64, 0, 11232, 64, 0, 11248, 64, 0, 12432, 2147483648, 0, 12436, 2147483648, 0, 12448, 2147483648, 0, 12452, 2147483648, 0, 12464, 2147483648, 0, 12468, 2147483648, 0, 13136, 2147483648, 0, 13140, 2147483648, 0, 13152, 2147483648, 0, 13156, 2147483648, 0, 13168, 2147483648, 0, 13172, 2147483648, 0, 13824, 32768, 0, 2624, 1048576, 0, 3840, 85, 0, 3840, 85, 0, 3840, 85, 0, 3840, 85, 0, 11216, 64, 0, 11232, 64, 0, 11248, 64, 0, 12432, 2147483648, 0, 12436, 2147483648, 0, 12448, 2147483648, 0, 12452, 2147483648, 0, 12464, 2147483648, 0, 12468, 2147483648, 0, 13136, 2147483648, 0, 13140, 2147483648, 0, 13152, 2147483648, 0, 13156, 2147483648, 0, 13168, 2147483648, 0, 13172, 2147483648, 0, 13824, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385851572254286_521_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385851572254286_521_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48f84b68 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385851572254286_521_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,387 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 1207959553, 0, 1472, 1207959553, 0, 1472, 1207959553, 0, 2512, 136348168, 0, 2512, 136348168, 0, 2512, 136348168, 0, 2512, 136348168, 0, 2512, 136348168, 0, 4176, 1, 0, 5056, 1207959553, 0, 5056, 1207959553, 0, 5056, 1207959553, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 9664, 272629776, 0, 9664, 272629776, 0, 9664, 272629776, 0, 10816, 16, 0, 11520, 268435472, 0, 11520, 268435472, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 13312, 1, 0, 14016, 16777216, 0, 15184, 268435456, 0, 15888, 268435472, 0, 15888, 268435472, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 22144, 2281701384, 0, 22144, 2281701384, 0, 22144, 2281701384, 0, 22160, 2281701384, 0, 22160, 2281701384, 0, 22160, 2281701384, 0, 1472, 1207959553, 0, 1472, 1207959553, 0, 1472, 1207959553, 0, 2512, 136348168, 0, 2512, 136348168, 0, 2512, 136348168, 0, 2512, 136348168, 0, 2512, 136348168, 0, 4176, 1, 0, 5056, 1207959553, 0, 5056, 1207959553, 0, 5056, 1207959553, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 6080, 2181570690, 0, 9664, 272629776, 0, 9664, 272629776, 0, 9664, 272629776, 0, 10816, 16, 0, 11520, 268435472, 0, 11520, 268435472, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 11840, 613566756, 0, 13312, 1, 0, 14016, 16777216, 0, 15184, 268435456, 0, 15888, 268435472, 0, 15888, 268435472, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 20608, 1145324612, 0, 22144, 2281701384, 0, 22144, 2281701384, 0, 22144, 2281701384, 0, 22160, 2281701384, 0, 22160, 2281701384, 0, 22160, 2281701384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385900985493891_522_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385900985493891_522_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d4aa93c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385900985493891_522_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 4))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 5568, 133120, 0, 5568, 133120, 0, 5584, 133120, 0, 5584, 133120, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 5568, 133120, 0, 5568, 133120, 0, 5584, 133120, 0, 5584, 133120, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385901383939006_523_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385901383939006_523_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4fffd2bd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385901383939006_523_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 18)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((249 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i2 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1227096064, 0, 768, 1227096064, 0, 768, 1227096064, 0, 768, 1227096064, 0, 768, 1227096064, 0, 1984, 1, 0, 2560, 4097, 0, 2560, 4097, 0, 2880, 4673, 0, 2880, 4673, 0, 2880, 4673, 0, 2880, 4673, 0, 3328, 32776, 0, 3328, 32776, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 9152, 536870912, 0, 11840, 2, 0, 13568, 1024, 0, 13584, 1024, 0, 13600, 1024, 0, 17600, 4, 0, 768, 1227096064, 0, 768, 1227096064, 0, 768, 1227096064, 0, 768, 1227096064, 0, 768, 1227096064, 0, 1984, 1, 0, 2560, 4097, 0, 2560, 4097, 0, 2880, 4673, 0, 2880, 4673, 0, 2880, 4673, 0, 2880, 4673, 0, 3328, 32776, 0, 3328, 32776, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 4352, 272696336, 0, 9152, 536870912, 0, 11840, 2, 0, 13568, 1024, 0, 13584, 1024, 0, 13600, 1024, 0, 17600, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385904408666102_524_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385904408666102_524_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0f9640de --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385904408666102_524_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,349 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((229 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((238 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2112, 16, 0, 2688, 268501008, 0, 2688, 268501008, 0, 2688, 268501008, 0, 3008, 4195328, 0, 3008, 4195328, 0, 8848, 8388608, 0, 8864, 8388608, 0, 8880, 8388608, 0, 9808, 133120, 0, 9808, 133120, 0, 9824, 133120, 0, 9824, 133120, 0, 9840, 133120, 0, 9840, 133120, 0, 10880, 85, 0, 10880, 85, 0, 10880, 85, 0, 10880, 85, 0, 12288, 17, 0, 12288, 17, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 17152, 559240, 0, 17152, 559240, 0, 17152, 559240, 0, 17152, 559240, 0, 17152, 559240, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2112, 16, 0, 2688, 268501008, 0, 2688, 268501008, 0, 2688, 268501008, 0, 3008, 4195328, 0, 3008, 4195328, 0, 8848, 8388608, 0, 8864, 8388608, 0, 8880, 8388608, 0, 9808, 133120, 0, 9808, 133120, 0, 9824, 133120, 0, 9824, 133120, 0, 9840, 133120, 0, 9840, 133120, 0, 10880, 85, 0, 10880, 85, 0, 10880, 85, 0, 10880, 85, 0, 12288, 17, 0, 12288, 17, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 17152, 559240, 0, 17152, 559240, 0, 17152, 559240, 0, 17152, 559240, 0, 17152, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756385913020207487_525_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756385913020207487_525_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ce54f6ac --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756385913020207487_525_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1536, 504, 0, 1536, 504, 0, 1536, 504, 0, 1536, 504, 0, 1536, 504, 0, 1536, 504, 0, 1280, 234881024, 0, 1280, 234881024, 0, 1280, 234881024, 0, 2560, 17, 0, 2560, 17, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 5632, 1090781184, 0, 5632, 1090781184, 0, 5632, 1090781184, 0, 5648, 1090781184, 0, 5648, 1090781184, 0, 5648, 1090781184, 0, 5664, 1090781184, 0, 5664, 1090781184, 0, 5664, 1090781184, 0, 6208, 272696336, 0, 6208, 272696336, 0, 6208, 272696336, 0, 6208, 272696336, 0, 6208, 272696336, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1920, 4026531847, 0, 1536, 504, 0, 1536, 504, 0, 1536, 504, 0, 1536, 504, 0, 1536, 504, 0, 1536, 504, 0, 1280, 234881024, 0, 1280, 234881024, 0, 1280, 234881024, 0, 2560, 17, 0, 2560, 17, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3456, 1145324612, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 3904, 559240, 0, 5632, 1090781184, 0, 5632, 1090781184, 0, 5632, 1090781184, 0, 5648, 1090781184, 0, 5648, 1090781184, 0, 5648, 1090781184, 0, 5664, 1090781184, 0, 5664, 1090781184, 0, 5664, 1090781184, 0, 6208, 272696336, 0, 6208, 272696336, 0, 6208, 272696336, 0, 6208, 272696336, 0, 6208, 272696336, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7104, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0, 7680, 68174084, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386206221457275_527_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386206221457275_527_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6b1d5f4c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386206221457275_527_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 6336, 73, 0, 6336, 73, 0, 6336, 73, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 13392, 292, 0, 13392, 292, 0, 13392, 292, 0, 13408, 292, 0, 13408, 292, 0, 13408, 292, 0, 13424, 292, 0, 13424, 292, 0, 13424, 292, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 6336, 73, 0, 6336, 73, 0, 6336, 73, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 6912, 272696336, 0, 13392, 292, 0, 13392, 292, 0, 13392, 292, 0, 13408, 292, 0, 13408, 292, 0, 13408, 292, 0, 13424, 292, 0, 13424, 292, 0, 13424, 292, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386207771080224_528_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386207771080224_528_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2a340b21 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386207771080224_528_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 15, 0, 1664, 15, 0, 1664, 15, 0, 1664, 15, 0, 1408, 33554432, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 2304, 17, 0, 2304, 17, 0, 6400, 32, 0, 6416, 32, 0, 8448, 536870914, 0, 8448, 536870914, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 9216, 559240, 0, 9216, 559240, 0, 9216, 559240, 0, 9216, 559240, 0, 9216, 559240, 0, 1664, 15, 0, 1664, 15, 0, 1664, 15, 0, 1664, 15, 0, 1408, 33554432, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 1024, 2829757088, 0, 2304, 17, 0, 2304, 17, 0, 6400, 32, 0, 6416, 32, 0, 8448, 536870914, 0, 8448, 536870914, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 9216, 559240, 0, 9216, 559240, 0, 9216, 559240, 0, 9216, 559240, 0, 9216, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386211807163809_530_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386211807163809_530_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b0ba2263 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386211807163809_530_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 5504, 33554432, 0, 5520, 33554432, 0, 5536, 33554432, 0, 6016, 32, 0, 7488, 33554432, 0, 8960, 134219778, 0, 8960, 134219778, 0, 8960, 134219778, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 5504, 33554432, 0, 5520, 33554432, 0, 5536, 33554432, 0, 6016, 32, 0, 7488, 33554432, 0, 8960, 134219778, 0, 8960, 134219778, 0, 8960, 134219778, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386212413830583_531_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386212413830583_531_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab349017 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386212413830583_531_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,402 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 22))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 22)) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 30)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 23))) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (353 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((396 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((409 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (440 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17))) { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((469 << 6) | (i6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter7 == 1)) { + break; + } + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (482 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (491 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 16, 0, 2128, 16, 0, 10432, 1073741824, 0, 10880, 1073741824, 0, 11456, 4195328, 0, 11456, 4195328, 0, 13632, 2147483656, 0, 13632, 2147483656, 0, 13648, 2147483656, 0, 13648, 2147483656, 0, 14912, 2290089984, 0, 14912, 2290089984, 0, 14912, 2290089984, 0, 14928, 2290089984, 0, 14928, 2290089984, 0, 14928, 2290089984, 0, 17552, 1, 0, 20352, 64, 0, 20368, 64, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 30848, 85, 0, 30848, 85, 0, 30848, 85, 0, 30848, 85, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 2112, 16, 0, 2128, 16, 0, 10432, 1073741824, 0, 10880, 1073741824, 0, 11456, 4195328, 0, 11456, 4195328, 0, 13632, 2147483656, 0, 13632, 2147483656, 0, 13648, 2147483656, 0, 13648, 2147483656, 0, 14912, 2290089984, 0, 14912, 2290089984, 0, 14912, 2290089984, 0, 14928, 2290089984, 0, 14928, 2290089984, 0, 14928, 2290089984, 0, 17552, 1, 0, 20352, 64, 0, 20368, 64, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 28160, 1145324548, 0, 30848, 85, 0, 30848, 85, 0, 30848, 85, 0, 30848, 85, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0, 31424, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386253210266748_533_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386253210266748_533_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d4867cd0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386253210266748_533_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,386 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 29))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 19)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((311 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2112, 537002016, 0, 2112, 537002016, 0, 2112, 537002016, 0, 5312, 1073741824, 0, 5952, 4194304, 0, 9536, 4194304, 0, 14400, 67125252, 0, 14400, 67125252, 0, 14400, 67125252, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 17664, 68174084, 0, 17664, 68174084, 0, 17664, 68174084, 0, 17664, 68174084, 0, 17664, 68174084, 0, 18832, 2684354562, 0, 18832, 2684354562, 0, 18832, 2684354562, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 576, 17, 0, 576, 17, 0, 2112, 537002016, 0, 2112, 537002016, 0, 2112, 537002016, 0, 5312, 1073741824, 0, 5952, 4194304, 0, 9536, 4194304, 0, 14400, 67125252, 0, 14400, 67125252, 0, 14400, 67125252, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 17664, 68174084, 0, 17664, 68174084, 0, 17664, 68174084, 0, 17664, 68174084, 0, 17664, 68174084, 0, 18832, 2684354562, 0, 18832, 2684354562, 0, 18832, 2684354562, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19920, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19924, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0, 19928, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386262563335194_534_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386262563335194_534_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..027ed65e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386262563335194_534_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 4096, 8, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 4096, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386262907691960_535_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386262907691960_535_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..58919933 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386262907691960_535_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 624 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4369, 0, 1088, 4369, 0, 1088, 4369, 0, 1088, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 2116, 4369, 0, 2116, 4369, 0, 2116, 4369, 0, 2116, 4369, 0, 2120, 4369, 0, 2120, 4369, 0, 2120, 4369, 0, 2120, 4369, 0, 2124, 4369, 0, 2124, 4369, 0, 2124, 4369, 0, 2124, 4369, 0, 2132, 4369, 0, 2132, 4369, 0, 2132, 4369, 0, 2132, 4369, 0, 2136, 4369, 0, 2136, 4369, 0, 2136, 4369, 0, 2136, 4369, 0, 2140, 4369, 0, 2140, 4369, 0, 2140, 4369, 0, 2140, 4369, 0, 2564, 4369, 0, 2564, 4369, 0, 2564, 4369, 0, 2564, 4369, 0, 2568, 4369, 0, 2568, 4369, 0, 2568, 4369, 0, 2568, 4369, 0, 2572, 4369, 0, 2572, 4369, 0, 2572, 4369, 0, 2572, 4369, 0, 2580, 4369, 0, 2580, 4369, 0, 2580, 4369, 0, 2580, 4369, 0, 2584, 4369, 0, 2584, 4369, 0, 2584, 4369, 0, 2584, 4369, 0, 2588, 4369, 0, 2588, 4369, 0, 2588, 4369, 0, 2588, 4369, 0, 3200, 855638016, 0, 3200, 855638016, 0, 3200, 855638016, 0, 3200, 855638016, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 1088, 4369, 0, 1088, 4369, 0, 1088, 4369, 0, 1088, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 1104, 4369, 0, 2116, 4369, 0, 2116, 4369, 0, 2116, 4369, 0, 2116, 4369, 0, 2120, 4369, 0, 2120, 4369, 0, 2120, 4369, 0, 2120, 4369, 0, 2124, 4369, 0, 2124, 4369, 0, 2124, 4369, 0, 2124, 4369, 0, 2132, 4369, 0, 2132, 4369, 0, 2132, 4369, 0, 2132, 4369, 0, 2136, 4369, 0, 2136, 4369, 0, 2136, 4369, 0, 2136, 4369, 0, 2140, 4369, 0, 2140, 4369, 0, 2140, 4369, 0, 2140, 4369, 0, 2564, 4369, 0, 2564, 4369, 0, 2564, 4369, 0, 2564, 4369, 0, 2568, 4369, 0, 2568, 4369, 0, 2568, 4369, 0, 2568, 4369, 0, 2572, 4369, 0, 2572, 4369, 0, 2572, 4369, 0, 2572, 4369, 0, 2580, 4369, 0, 2580, 4369, 0, 2580, 4369, 0, 2580, 4369, 0, 2584, 4369, 0, 2584, 4369, 0, 2584, 4369, 0, 2584, 4369, 0, 2588, 4369, 0, 2588, 4369, 0, 2588, 4369, 0, 2588, 4369, 0, 3200, 855638016, 0, 3200, 855638016, 0, 3200, 855638016, 0, 3200, 855638016, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386268634666580_536_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386268634666580_536_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..10722811 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386268634666580_536_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386268865447773_537_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386268865447773_537_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..576bda33 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386268865447773_537_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386269062181623_538_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386269062181623_538_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..428d4976 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386269062181623_538_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 23))) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((159 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 3008, 8, 0, 3024, 8, 0, 3840, 73, 0, 3840, 73, 0, 3840, 73, 0, 8960, 68174084, 0, 8960, 68174084, 0, 8960, 68174084, 0, 8960, 68174084, 0, 8960, 68174084, 0, 8976, 68174084, 0, 8976, 68174084, 0, 8976, 68174084, 0, 8976, 68174084, 0, 8976, 68174084, 0, 8992, 68174084, 0, 8992, 68174084, 0, 8992, 68174084, 0, 8992, 68174084, 0, 8992, 68174084, 0, 10176, 603979780, 0, 10176, 603979780, 0, 10176, 603979780, 0, 10180, 603979780, 0, 10180, 603979780, 0, 10180, 603979780, 0, 10184, 603979780, 0, 10184, 603979780, 0, 10184, 603979780, 0, 10192, 603979780, 0, 10192, 603979780, 0, 10192, 603979780, 0, 10196, 603979780, 0, 10196, 603979780, 0, 10196, 603979780, 0, 10200, 603979780, 0, 10200, 603979780, 0, 10200, 603979780, 0, 10208, 603979780, 0, 10208, 603979780, 0, 10208, 603979780, 0, 10212, 603979780, 0, 10212, 603979780, 0, 10212, 603979780, 0, 10216, 603979780, 0, 10216, 603979780, 0, 10216, 603979780, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 3008, 8, 0, 3024, 8, 0, 3840, 73, 0, 3840, 73, 0, 3840, 73, 0, 8960, 68174084, 0, 8960, 68174084, 0, 8960, 68174084, 0, 8960, 68174084, 0, 8960, 68174084, 0, 8976, 68174084, 0, 8976, 68174084, 0, 8976, 68174084, 0, 8976, 68174084, 0, 8976, 68174084, 0, 8992, 68174084, 0, 8992, 68174084, 0, 8992, 68174084, 0, 8992, 68174084, 0, 8992, 68174084, 0, 10176, 603979780, 0, 10176, 603979780, 0, 10176, 603979780, 0, 10180, 603979780, 0, 10180, 603979780, 0, 10180, 603979780, 0, 10184, 603979780, 0, 10184, 603979780, 0, 10184, 603979780, 0, 10192, 603979780, 0, 10192, 603979780, 0, 10192, 603979780, 0, 10196, 603979780, 0, 10196, 603979780, 0, 10196, 603979780, 0, 10200, 603979780, 0, 10200, 603979780, 0, 10200, 603979780, 0, 10208, 603979780, 0, 10208, 603979780, 0, 10208, 603979780, 0, 10212, 603979780, 0, 10212, 603979780, 0, 10212, 603979780, 0, 10216, 603979780, 0, 10216, 603979780, 0, 10216, 603979780, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386275123412898_539_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386275123412898_539_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3c0c15bd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386275123412898_539_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((174 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 7616, 1073741824, 0, 7632, 1073741824, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 12608, 131072, 0, 12612, 131072, 0, 12624, 131072, 0, 12628, 131072, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 7616, 1073741824, 0, 7632, 1073741824, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8704, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8708, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8720, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 8724, 4294705152, 0, 12608, 131072, 0, 12612, 131072, 0, 12624, 131072, 0, 12628, 131072, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386297670375712_540_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386297670375712_540_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..59e329da --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386297670375712_540_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,247 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 6))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 3456, 1, 0, 4864, 1, 0, 6528, 4218880, 0, 6528, 4218880, 0, 6528, 4218880, 0, 8000, 35782912, 0, 8000, 35782912, 0, 8000, 35782912, 0, 8000, 35782912, 0, 8640, 73, 0, 8640, 73, 0, 8640, 73, 0, 12544, 65536, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 1472, 1431655765, 0, 3456, 1, 0, 4864, 1, 0, 6528, 4218880, 0, 6528, 4218880, 0, 6528, 4218880, 0, 8000, 35782912, 0, 8000, 35782912, 0, 8000, 35782912, 0, 8000, 35782912, 0, 8640, 73, 0, 8640, 73, 0, 8640, 73, 0, 12544, 65536, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0, 14400, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386300548146205_541_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386300548146205_541_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b80c0de --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386300548146205_541_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 12))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1164 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 5840, 1342177301, 0, 5840, 1342177301, 0, 5840, 1342177301, 0, 5840, 1342177301, 0, 5840, 1342177301, 0, 5856, 1342177301, 0, 5856, 1342177301, 0, 5856, 1342177301, 0, 5856, 1342177301, 0, 5856, 1342177301, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 8576, 2684354560, 0, 8576, 2684354560, 0, 8580, 2684354560, 0, 8580, 2684354560, 0, 8592, 2684354560, 0, 8592, 2684354560, 0, 8596, 2684354560, 0, 8596, 2684354560, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 4224, 1431655765, 0, 5840, 1342177301, 0, 5840, 1342177301, 0, 5840, 1342177301, 0, 5840, 1342177301, 0, 5840, 1342177301, 0, 5856, 1342177301, 0, 5856, 1342177301, 0, 5856, 1342177301, 0, 5856, 1342177301, 0, 5856, 1342177301, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7616, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 7632, 2863311530, 0, 8576, 2684354560, 0, 8576, 2684354560, 0, 8580, 2684354560, 0, 8580, 2684354560, 0, 8592, 2684354560, 0, 8592, 2684354560, 0, 8596, 2684354560, 0, 8596, 2684354560, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9472, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9476, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9488, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 9492, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10048, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10052, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10064, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0, 10068, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386435339607224_543_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386435339607224_543_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..13fbfe9e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386435339607224_543_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((79 << 6) | (i0 << 4)) | (i1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((88 << 6) | (i0 << 4)) | (i1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1050 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 5056, 4, 0, 5057, 4, 0, 5060, 4, 0, 5061, 4, 0, 5064, 4, 0, 5065, 4, 0, 5072, 4, 0, 5073, 4, 0, 5076, 4, 0, 5077, 4, 0, 5080, 4, 0, 5081, 4, 0, 5088, 4, 0, 5089, 4, 0, 5092, 4, 0, 5093, 4, 0, 5096, 4, 0, 5097, 4, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2240, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2256, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 2272, 4293918720, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3328, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3332, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3336, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3344, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3348, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3352, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3360, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3364, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 3368, 8191, 0, 5056, 4, 0, 5057, 4, 0, 5060, 4, 0, 5061, 4, 0, 5064, 4, 0, 5065, 4, 0, 5072, 4, 0, 5073, 4, 0, 5076, 4, 0, 5077, 4, 0, 5080, 4, 0, 5081, 4, 0, 5088, 4, 0, 5089, 4, 0, 5092, 4, 0, 5093, 4, 0, 5096, 4, 0, 5097, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386528306614165_546_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386528306614165_546_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..576bda33 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386528306614165_546_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386528492500949_547_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386528492500949_547_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9aed2c45 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386528492500949_547_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,439 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((255 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((274 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((279 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((286 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 12))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((360 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (389 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 576 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 2147483649, 0, 1088, 2147483649, 0, 2112, 1342177281, 0, 2112, 1342177281, 0, 2112, 1342177281, 0, 2752, 1, 0, 3648, 1073741824, 0, 4928, 1342177281, 0, 4928, 1342177281, 0, 4928, 1342177281, 0, 5632, 2684354560, 0, 5632, 2684354560, 0, 5632, 1342177281, 0, 5632, 1342177281, 0, 5632, 1342177281, 0, 7744, 30, 0, 7744, 30, 0, 7744, 30, 0, 7744, 30, 0, 7760, 30, 0, 7760, 30, 0, 7760, 30, 0, 7760, 30, 0, 8448, 14, 0, 8448, 14, 0, 8448, 14, 0, 8464, 14, 0, 8464, 14, 0, 8464, 14, 0, 9664, 75497984, 0, 9664, 75497984, 0, 9664, 75497984, 0, 9680, 75497984, 0, 9680, 75497984, 0, 9680, 75497984, 0, 10880, 25182208, 0, 10880, 25182208, 0, 10880, 25182208, 0, 10896, 25182208, 0, 10896, 25182208, 0, 10896, 25182208, 0, 11328, 524288, 0, 11344, 524288, 0, 11968, 73, 0, 11968, 73, 0, 11968, 73, 0, 12544, 272696336, 0, 12544, 272696336, 0, 12544, 272696336, 0, 12544, 272696336, 0, 12544, 272696336, 0, 13760, 603979776, 0, 13760, 603979776, 0, 13776, 603979776, 0, 13776, 603979776, 0, 13792, 603979776, 0, 13792, 603979776, 0, 15440, 2048, 0, 16340, 2048, 0, 16344, 2048, 0, 17556, 4194304, 0, 17560, 4194304, 0, 17876, 2048, 0, 17880, 2048, 0, 18896, 2048, 0, 22080, 4096, 0, 23040, 16777216, 0, 23056, 16777216, 0, 23072, 16777216, 0, 24000, 4096, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 25344, 557192, 0, 25344, 557192, 0, 25344, 557192, 0, 25344, 557192, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 1088, 2147483649, 0, 1088, 2147483649, 0, 2112, 1342177281, 0, 2112, 1342177281, 0, 2112, 1342177281, 0, 2752, 1, 0, 3648, 1073741824, 0, 4928, 1342177281, 0, 4928, 1342177281, 0, 4928, 1342177281, 0, 5632, 2684354560, 0, 5632, 2684354560, 0, 5632, 1342177281, 0, 5632, 1342177281, 0, 5632, 1342177281, 0, 7744, 30, 0, 7744, 30, 0, 7744, 30, 0, 7744, 30, 0, 7760, 30, 0, 7760, 30, 0, 7760, 30, 0, 7760, 30, 0, 8448, 14, 0, 8448, 14, 0, 8448, 14, 0, 8464, 14, 0, 8464, 14, 0, 8464, 14, 0, 9664, 75497984, 0, 9664, 75497984, 0, 9664, 75497984, 0, 9680, 75497984, 0, 9680, 75497984, 0, 9680, 75497984, 0, 10880, 25182208, 0, 10880, 25182208, 0, 10880, 25182208, 0, 10896, 25182208, 0, 10896, 25182208, 0, 10896, 25182208, 0, 11328, 524288, 0, 11344, 524288, 0, 11968, 73, 0, 11968, 73, 0, 11968, 73, 0, 12544, 272696336, 0, 12544, 272696336, 0, 12544, 272696336, 0, 12544, 272696336, 0, 12544, 272696336, 0, 13760, 603979776, 0, 13760, 603979776, 0, 13776, 603979776, 0, 13776, 603979776, 0, 13792, 603979776, 0, 13792, 603979776, 0, 15440, 2048, 0, 16340, 2048, 0, 16344, 2048, 0, 17556, 4194304, 0, 17560, 4194304, 0, 17876, 2048, 0, 17880, 2048, 0, 18896, 2048, 0, 22080, 4096, 0, 23040, 16777216, 0, 23056, 16777216, 0, 23072, 16777216, 0, 24000, 4096, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 24896, 1141130308, 0, 25344, 557192, 0, 25344, 557192, 0, 25344, 557192, 0, 25344, 557192, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0, 26176, 2863309482, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386561949197541_548_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386561949197541_548_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..405a16ba --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386561949197541_548_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((191 << 6) | (counter2 << 4)) | (i3 << 2)) | counter4); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((200 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 594 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1664, 1048832, 0, 1664, 1048832, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 3008, 559240, 0, 3008, 559240, 0, 3008, 559240, 0, 3008, 559240, 0, 3008, 559240, 0, 4624, 268435520, 0, 4624, 268435520, 0, 4640, 268435520, 0, 4640, 268435520, 0, 5840, 1073741825, 0, 5840, 1073741825, 0, 5844, 1073741825, 0, 5844, 1073741825, 0, 5856, 1073741825, 0, 5856, 1073741825, 0, 5860, 1073741825, 0, 5860, 1073741825, 0, 6800, 4, 0, 6816, 4, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 10128, 8388644, 0, 10128, 8388644, 0, 10128, 8388644, 0, 10132, 8388644, 0, 10132, 8388644, 0, 10132, 8388644, 0, 10144, 8388644, 0, 10144, 8388644, 0, 10144, 8388644, 0, 10148, 8388644, 0, 10148, 8388644, 0, 10148, 8388644, 0, 10160, 8388644, 0, 10160, 8388644, 0, 10160, 8388644, 0, 10164, 8388644, 0, 10164, 8388644, 0, 10164, 8388644, 0, 768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1664, 1048832, 0, 1664, 1048832, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 2560, 1145324612, 0, 3008, 559240, 0, 3008, 559240, 0, 3008, 559240, 0, 3008, 559240, 0, 3008, 559240, 0, 4624, 268435520, 0, 4624, 268435520, 0, 4640, 268435520, 0, 4640, 268435520, 0, 5840, 1073741825, 0, 5840, 1073741825, 0, 5844, 1073741825, 0, 5844, 1073741825, 0, 5856, 1073741825, 0, 5856, 1073741825, 0, 5860, 1073741825, 0, 5860, 1073741825, 0, 6800, 4, 0, 6816, 4, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8528, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8544, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 8560, 1431655765, 0, 10128, 8388644, 0, 10128, 8388644, 0, 10128, 8388644, 0, 10132, 8388644, 0, 10132, 8388644, 0, 10132, 8388644, 0, 10144, 8388644, 0, 10144, 8388644, 0, 10144, 8388644, 0, 10148, 8388644, 0, 10148, 8388644, 0, 10148, 8388644, 0, 10160, 8388644, 0, 10160, 8388644, 0, 10160, 8388644, 0, 10164, 8388644, 0, 10164, 8388644, 0, 10164, 8388644, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386692109818097_552_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386692109818097_552_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..858aa334 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386692109818097_552_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,135 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 630 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2816, 268439552, 0, 2816, 268439552, 0, 2820, 268439552, 0, 2820, 268439552, 0, 2832, 268439552, 0, 2832, 268439552, 0, 2836, 268439552, 0, 2836, 268439552, 0, 2848, 268439552, 0, 2848, 268439552, 0, 2852, 268439552, 0, 2852, 268439552, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 5248, 1024, 0, 5264, 1024, 0, 5280, 1024, 0, 2816, 268439552, 0, 2816, 268439552, 0, 2820, 268439552, 0, 2820, 268439552, 0, 2832, 268439552, 0, 2832, 268439552, 0, 2836, 268439552, 0, 2836, 268439552, 0, 2848, 268439552, 0, 2848, 268439552, 0, 2852, 268439552, 0, 2852, 268439552, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4096, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4100, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4112, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4116, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4128, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4132, 1073742165, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4800, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4804, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4816, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4820, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4832, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 4836, 1426063701, 0, 5248, 1024, 0, 5264, 1024, 0, 5280, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386703649209477_553_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386703649209477_553_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..61421b3c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386703649209477_553_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,312 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((307 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2624, 1073741824, 0, 3840, 1073741824, 0, 4864, 1073741824, 0, 6864, 4195328, 0, 6864, 4195328, 0, 6868, 4195328, 0, 6868, 4195328, 0, 6872, 4195328, 0, 6872, 4195328, 0, 6880, 4195328, 0, 6880, 4195328, 0, 6884, 4195328, 0, 6884, 4195328, 0, 6888, 4195328, 0, 6888, 4195328, 0, 12096, 4, 0, 13248, 2147483656, 0, 13248, 2147483656, 0, 14416, 2147483648, 0, 14432, 2147483648, 0, 14448, 2147483648, 0, 16208, 2147483648, 0, 16224, 2147483648, 0, 16240, 2147483648, 0, 17024, 2048, 0, 18752, 32768, 0, 18768, 32768, 0, 19652, 524288, 0, 19656, 524288, 0, 19660, 524288, 0, 19668, 524288, 0, 19672, 524288, 0, 19676, 524288, 0, 576, 17, 0, 576, 17, 0, 2624, 1073741824, 0, 3840, 1073741824, 0, 4864, 1073741824, 0, 6864, 4195328, 0, 6864, 4195328, 0, 6868, 4195328, 0, 6868, 4195328, 0, 6872, 4195328, 0, 6872, 4195328, 0, 6880, 4195328, 0, 6880, 4195328, 0, 6884, 4195328, 0, 6884, 4195328, 0, 6888, 4195328, 0, 6888, 4195328, 0, 12096, 4, 0, 13248, 2147483656, 0, 13248, 2147483656, 0, 14416, 2147483648, 0, 14432, 2147483648, 0, 14448, 2147483648, 0, 16208, 2147483648, 0, 16224, 2147483648, 0, 16240, 2147483648, 0, 17024, 2048, 0, 18752, 32768, 0, 18768, 32768, 0, 19652, 524288, 0, 19656, 524288, 0, 19660, 524288, 0, 19668, 524288, 0, 19672, 524288, 0, 19676, 524288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386759666924874_556_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386759666924874_556_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6709cf9a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386759666924874_556_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,98 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2896, 2048, 0, 2912, 2048, 0, 2896, 2048, 0, 2912, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386759891122893_557_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386759891122893_557_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28ac8cd6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386759891122893_557_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 16))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 2304, 2863311530, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9600, 1363481681, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0, 9920, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386761713503184_558_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386761713503184_558_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a77f54e5 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386761713503184_558_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3328, 536872960, 0, 3328, 536872960, 0, 3072, 402653184, 0, 3072, 402653184, 0, 2816, 32, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3712, 3221225503, 0, 3328, 536872960, 0, 3328, 536872960, 0, 3072, 402653184, 0, 3072, 402653184, 0, 2816, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386762148495600_559_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386762148495600_559_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bb7c28b8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386762148495600_559_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2752, 2181038082, 0, 2752, 2181038082, 0, 2752, 2181038082, 0, 4736, 2147483648, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2752, 2181038082, 0, 2752, 2181038082, 0, 2752, 2181038082, 0, 4736, 2147483648, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386762581820388_560_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386762581820388_560_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3e360ebb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386762581820388_560_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,365 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 13)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4288, 2, 0, 8320, 559240, 0, 8320, 559240, 0, 8320, 559240, 0, 8320, 559240, 0, 8320, 559240, 0, 9152, 273, 0, 9152, 273, 0, 9152, 273, 0, 9792, 17, 0, 9792, 17, 0, 11584, 17, 0, 11584, 17, 0, 12160, 285212672, 0, 12160, 285212672, 0, 16192, 536870944, 0, 16192, 536870944, 0, 17024, 536870912, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 18368, 559240, 0, 18368, 559240, 0, 18368, 559240, 0, 18368, 559240, 0, 18368, 559240, 0, 4288, 2, 0, 8320, 559240, 0, 8320, 559240, 0, 8320, 559240, 0, 8320, 559240, 0, 8320, 559240, 0, 9152, 273, 0, 9152, 273, 0, 9152, 273, 0, 9792, 17, 0, 9792, 17, 0, 11584, 17, 0, 11584, 17, 0, 12160, 285212672, 0, 12160, 285212672, 0, 16192, 536870944, 0, 16192, 536870944, 0, 17024, 536870912, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 17920, 1145324612, 0, 18368, 559240, 0, 18368, 559240, 0, 18368, 559240, 0, 18368, 559240, 0, 18368, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386769905336497_561_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386769905336497_561_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..047c2ebd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386769905336497_561_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,491 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 3)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((360 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((375 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 2)) { + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((413 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((427 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((442 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((449 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((461 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter5 == 2)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (474 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (487 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (506 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (521 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (530 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((556 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((567 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (574 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (579 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 414 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 2097152, 0, 4304, 134217728, 0, 7616, 2048, 0, 8960, 1073741825, 0, 8960, 1073741825, 0, 15360, 1073741825, 0, 15360, 1073741825, 0, 18624, 272696336, 0, 18624, 272696336, 0, 18624, 272696336, 0, 18624, 272696336, 0, 18624, 272696336, 0, 20288, 4, 0, 21328, 4, 0, 21344, 4, 0, 21360, 4, 0, 23040, 75497472, 0, 23040, 75497472, 0, 23056, 75497472, 0, 23056, 75497472, 0, 23072, 75497472, 0, 23072, 75497472, 0, 24000, 2336, 0, 24000, 2336, 0, 24000, 2336, 0, 24004, 2336, 0, 24004, 2336, 0, 24004, 2336, 0, 24016, 2336, 0, 24016, 2336, 0, 24016, 2336, 0, 24020, 2336, 0, 24020, 2336, 0, 24020, 2336, 0, 24032, 2336, 0, 24032, 2336, 0, 24032, 2336, 0, 24036, 2336, 0, 24036, 2336, 0, 24036, 2336, 0, 26448, 134217728, 0, 26464, 134217728, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 28304, 1073741824, 0, 28308, 1073741824, 0, 28320, 1073741824, 0, 28324, 1073741824, 0, 33920, 4194304, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 1856, 2097152, 0, 4304, 134217728, 0, 7616, 2048, 0, 8960, 1073741825, 0, 8960, 1073741825, 0, 15360, 1073741825, 0, 15360, 1073741825, 0, 18624, 272696336, 0, 18624, 272696336, 0, 18624, 272696336, 0, 18624, 272696336, 0, 18624, 272696336, 0, 20288, 4, 0, 21328, 4, 0, 21344, 4, 0, 21360, 4, 0, 23040, 75497472, 0, 23040, 75497472, 0, 23056, 75497472, 0, 23056, 75497472, 0, 23072, 75497472, 0, 23072, 75497472, 0, 24000, 2336, 0, 24000, 2336, 0, 24000, 2336, 0, 24004, 2336, 0, 24004, 2336, 0, 24004, 2336, 0, 24016, 2336, 0, 24016, 2336, 0, 24016, 2336, 0, 24020, 2336, 0, 24020, 2336, 0, 24020, 2336, 0, 24032, 2336, 0, 24032, 2336, 0, 24032, 2336, 0, 24036, 2336, 0, 24036, 2336, 0, 24036, 2336, 0, 26448, 134217728, 0, 26464, 134217728, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27344, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 27360, 1090785345, 0, 28304, 1073741824, 0, 28308, 1073741824, 0, 28320, 1073741824, 0, 28324, 1073741824, 0, 33920, 4194304, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0, 37056, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386829304716907_562_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386829304716907_562_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..772519f8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386829304716907_562_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 2304, 34603008, 0, 2304, 34603008, 0, 2320, 34603008, 0, 2320, 34603008, 0, 4608, 2147483648, 0, 4624, 2147483648, 0, 10880, 3, 0, 10880, 3, 0, 13200, 167773184, 0, 13200, 167773184, 0, 13200, 167773184, 0, 13216, 167773184, 0, 13216, 167773184, 0, 13216, 167773184, 0, 13232, 167773184, 0, 13232, 167773184, 0, 13232, 167773184, 0, 14672, 65536, 0, 14688, 65536, 0, 14704, 65536, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 576, 4293918720, 0, 2304, 34603008, 0, 2304, 34603008, 0, 2320, 34603008, 0, 2320, 34603008, 0, 4608, 2147483648, 0, 4624, 2147483648, 0, 10880, 3, 0, 10880, 3, 0, 13200, 167773184, 0, 13200, 167773184, 0, 13200, 167773184, 0, 13216, 167773184, 0, 13216, 167773184, 0, 13216, 167773184, 0, 13232, 167773184, 0, 13232, 167773184, 0, 13232, 167773184, 0, 14672, 65536, 0, 14688, 65536, 0, 14704, 65536, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0, 17280, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386862909105619_563_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386862909105619_563_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..025a28bc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386862909105619_563_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,221 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((23 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 390 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1940, 4096, 0, 1944, 4096, 0, 1956, 4096, 0, 1960, 4096, 0, 1972, 4096, 0, 1976, 4096, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 3920, 21, 0, 3920, 21, 0, 3920, 21, 0, 3936, 21, 0, 3936, 21, 0, 3936, 21, 0, 5328, 16384, 0, 5344, 16384, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9472, 2048, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 1940, 4096, 0, 1944, 4096, 0, 1956, 4096, 0, 1960, 4096, 0, 1972, 4096, 0, 1976, 4096, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2496, 272696336, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 2816, 613566756, 0, 3920, 21, 0, 3920, 21, 0, 3920, 21, 0, 3936, 21, 0, 3936, 21, 0, 3936, 21, 0, 5328, 16384, 0, 5344, 16384, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6352, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 6368, 5461, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9856, 4286578688, 0, 9472, 2048, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0, 8704, 5592405, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756386888799245448_564_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756386888799245448_564_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5d221dbe --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756386888799245448_564_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,324 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 28))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1158 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1984, 2, 0, 1988, 2, 0, 1992, 2, 0, 2000, 2, 0, 2004, 2, 0, 2008, 2, 0, 2624, 17, 0, 2624, 17, 0, 2628, 17, 0, 2628, 17, 0, 2632, 17, 0, 2632, 17, 0, 2640, 17, 0, 2640, 17, 0, 2644, 17, 0, 2644, 17, 0, 2648, 17, 0, 2648, 17, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3972, 559240, 0, 3972, 559240, 0, 3972, 559240, 0, 3972, 559240, 0, 3972, 559240, 0, 3976, 559240, 0, 3976, 559240, 0, 3976, 559240, 0, 3976, 559240, 0, 3976, 559240, 0, 3984, 559240, 0, 3984, 559240, 0, 3984, 559240, 0, 3984, 559240, 0, 3984, 559240, 0, 3988, 559240, 0, 3988, 559240, 0, 3988, 559240, 0, 3988, 559240, 0, 3988, 559240, 0, 3992, 559240, 0, 3992, 559240, 0, 3992, 559240, 0, 3992, 559240, 0, 3992, 559240, 0, 4672, 2, 0, 4676, 2, 0, 4680, 2, 0, 4688, 2, 0, 4692, 2, 0, 4696, 2, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 7376, 4112, 0, 7376, 4112, 0, 7392, 4112, 0, 7392, 4112, 0, 7408, 4112, 0, 7408, 4112, 0, 8528, 268435457, 0, 8528, 268435457, 0, 8544, 268435457, 0, 8544, 268435457, 0, 8560, 268435457, 0, 8560, 268435457, 0, 9232, 268435457, 0, 9232, 268435457, 0, 9248, 268435457, 0, 9248, 268435457, 0, 9264, 268435457, 0, 9264, 268435457, 0, 9936, 16, 0, 9952, 16, 0, 9968, 16, 0, 10640, 16, 0, 10656, 16, 0, 10672, 16, 0, 13056, 64, 0, 16128, 4, 0, 19200, 8, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1984, 2, 0, 1988, 2, 0, 1992, 2, 0, 2000, 2, 0, 2004, 2, 0, 2008, 2, 0, 2624, 17, 0, 2624, 17, 0, 2628, 17, 0, 2628, 17, 0, 2632, 17, 0, 2632, 17, 0, 2640, 17, 0, 2640, 17, 0, 2644, 17, 0, 2644, 17, 0, 2648, 17, 0, 2648, 17, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3520, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3524, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3528, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3536, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3540, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3544, 1145324612, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3968, 559240, 0, 3972, 559240, 0, 3972, 559240, 0, 3972, 559240, 0, 3972, 559240, 0, 3972, 559240, 0, 3976, 559240, 0, 3976, 559240, 0, 3976, 559240, 0, 3976, 559240, 0, 3976, 559240, 0, 3984, 559240, 0, 3984, 559240, 0, 3984, 559240, 0, 3984, 559240, 0, 3984, 559240, 0, 3988, 559240, 0, 3988, 559240, 0, 3988, 559240, 0, 3988, 559240, 0, 3988, 559240, 0, 3992, 559240, 0, 3992, 559240, 0, 3992, 559240, 0, 3992, 559240, 0, 3992, 559240, 0, 4672, 2, 0, 4676, 2, 0, 4680, 2, 0, 4688, 2, 0, 4692, 2, 0, 4696, 2, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5248, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 5264, 1431655765, 0, 7376, 4112, 0, 7376, 4112, 0, 7392, 4112, 0, 7392, 4112, 0, 7408, 4112, 0, 7408, 4112, 0, 8528, 268435457, 0, 8528, 268435457, 0, 8544, 268435457, 0, 8544, 268435457, 0, 8560, 268435457, 0, 8560, 268435457, 0, 9232, 268435457, 0, 9232, 268435457, 0, 9248, 268435457, 0, 9248, 268435457, 0, 9264, 268435457, 0, 9264, 268435457, 0, 9936, 16, 0, 9952, 16, 0, 9968, 16, 0, 10640, 16, 0, 10656, 16, 0, 10672, 16, 0, 13056, 64, 0, 16128, 4, 0, 19200, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387117042303769_566_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387117042303769_566_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..509712dc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387117042303769_566_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 65536, 0, 3600, 65536, 0, 3616, 65536, 0, 9408, 1024, 0, 13312, 128, 0, 2368, 65536, 0, 3600, 65536, 0, 3616, 65536, 0, 9408, 1024, 0, 13312, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387117372135024_567_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387117372135024_567_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1f5588ca --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387117372135024_567_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387117654570315_568_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387117654570315_568_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ed7a5055 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387117654570315_568_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 576 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 85, 0, 1088, 85, 0, 1088, 85, 0, 1088, 85, 0, 1104, 85, 0, 1104, 85, 0, 1104, 85, 0, 1104, 85, 0, 1536, 1365, 0, 1536, 1365, 0, 1536, 1365, 0, 1536, 1365, 0, 1536, 1365, 0, 1536, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 3648, 524288, 0, 3664, 524288, 0, 6592, 524288, 0, 6596, 524288, 0, 6608, 524288, 0, 6612, 524288, 0, 7552, 134217728, 0, 7556, 134217728, 0, 7568, 134217728, 0, 7572, 134217728, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 9536, 512, 0, 9552, 512, 0, 1088, 85, 0, 1088, 85, 0, 1088, 85, 0, 1088, 85, 0, 1104, 85, 0, 1104, 85, 0, 1104, 85, 0, 1104, 85, 0, 1536, 1365, 0, 1536, 1365, 0, 1536, 1365, 0, 1536, 1365, 0, 1536, 1365, 0, 1536, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 1552, 1365, 0, 3648, 524288, 0, 3664, 524288, 0, 6592, 524288, 0, 6596, 524288, 0, 6608, 524288, 0, 6612, 524288, 0, 7552, 134217728, 0, 7556, 134217728, 0, 7568, 134217728, 0, 7572, 134217728, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8128, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8132, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8144, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 8148, 2863311530, 0, 9536, 512, 0, 9552, 512, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387409818781013_570_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387409818781013_570_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..71998bda --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387409818781013_570_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,256 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 18)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 6208, 4, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 8704, 603979780, 0, 8704, 603979780, 0, 8704, 603979780, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 10624, 3758096384, 0, 10624, 3758096384, 0, 10624, 3758096384, 0, 12160, 1140850688, 0, 12160, 1140850688, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 6208, 4, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 7744, 613566500, 0, 8704, 603979780, 0, 8704, 603979780, 0, 8704, 603979780, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 9472, 4290772992, 0, 10624, 3758096384, 0, 10624, 3758096384, 0, 10624, 3758096384, 0, 12160, 1140850688, 0, 12160, 1140850688, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387410872867064_571_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387410872867064_571_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..998350c2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387410872867064_571_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,130 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 67109120, 0, 1936, 67109120, 0, 1952, 67109120, 0, 1952, 67109120, 0, 1968, 67109120, 0, 1968, 67109120, 0, 4688, 131074, 0, 4688, 131074, 0, 4704, 131074, 0, 4704, 131074, 0, 4720, 131074, 0, 4720, 131074, 0, 5376, 2, 0, 1936, 67109120, 0, 1936, 67109120, 0, 1952, 67109120, 0, 1952, 67109120, 0, 1968, 67109120, 0, 1968, 67109120, 0, 4688, 131074, 0, 4688, 131074, 0, 4704, 131074, 0, 4704, 131074, 0, 4720, 131074, 0, 4720, 131074, 0, 5376, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387493875668261_575_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387493875668261_575_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cb1d30e7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387493875668261_575_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 8, 0, 1792, 2181570690, 0, 1792, 2181570690, 0, 1792, 2181570690, 0, 1792, 2181570690, 0, 1792, 2181570690, 0, 1792, 2181570690, 0, 896, 8, 0, 1792, 2181570690, 0, 1792, 2181570690, 0, 1792, 2181570690, 0, 1792, 2181570690, 0, 1792, 2181570690, 0, 1792, 2181570690, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387494187978362_576_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387494187978362_576_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..11c9b805 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387494187978362_576_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 30)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 8592, 64, 0, 8596, 64, 0, 9408, 559240, 0, 9408, 559240, 0, 9408, 559240, 0, 9408, 559240, 0, 9408, 559240, 0, 17664, 4194304, 0, 576, 17, 0, 576, 17, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 2240, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 6144, 35791394, 0, 8592, 64, 0, 8596, 64, 0, 9408, 559240, 0, 9408, 559240, 0, 9408, 559240, 0, 9408, 559240, 0, 9408, 559240, 0, 17664, 4194304, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387562307700687_578_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387562307700687_578_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b60c0fa0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387562307700687_578_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,270 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 28)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((276 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 600 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5696, 85, 0, 5696, 85, 0, 5696, 85, 0, 5696, 85, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 7936, 268435729, 0, 7936, 268435729, 0, 7936, 268435729, 0, 7936, 268435729, 0, 7952, 268435729, 0, 7952, 268435729, 0, 7952, 268435729, 0, 7952, 268435729, 0, 7968, 268435729, 0, 7968, 268435729, 0, 7968, 268435729, 0, 7968, 268435729, 0, 9920, 1048576, 0, 9924, 1048576, 0, 9936, 1048576, 0, 9940, 1048576, 0, 9952, 1048576, 0, 9956, 1048576, 0, 10624, 268435457, 0, 10624, 268435457, 0, 10640, 268435457, 0, 10640, 268435457, 0, 10656, 268435457, 0, 10656, 268435457, 0, 11776, 536870912, 0, 11792, 536870912, 0, 11808, 536870912, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 16448, 2290614272, 0, 16448, 2290614272, 0, 16448, 2290614272, 0, 16448, 2290614272, 0, 16464, 2290614272, 0, 16464, 2290614272, 0, 16464, 2290614272, 0, 16464, 2290614272, 0, 16480, 2290614272, 0, 16480, 2290614272, 0, 16480, 2290614272, 0, 16480, 2290614272, 0, 17664, 2281701376, 0, 17664, 2281701376, 0, 17668, 2281701376, 0, 17668, 2281701376, 0, 17680, 2281701376, 0, 17680, 2281701376, 0, 17684, 2281701376, 0, 17684, 2281701376, 0, 17696, 2281701376, 0, 17696, 2281701376, 0, 17700, 2281701376, 0, 17700, 2281701376, 0, 5696, 85, 0, 5696, 85, 0, 5696, 85, 0, 5696, 85, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 6272, 1431655765, 0, 7936, 268435729, 0, 7936, 268435729, 0, 7936, 268435729, 0, 7936, 268435729, 0, 7952, 268435729, 0, 7952, 268435729, 0, 7952, 268435729, 0, 7952, 268435729, 0, 7968, 268435729, 0, 7968, 268435729, 0, 7968, 268435729, 0, 7968, 268435729, 0, 9920, 1048576, 0, 9924, 1048576, 0, 9936, 1048576, 0, 9940, 1048576, 0, 9952, 1048576, 0, 9956, 1048576, 0, 10624, 268435457, 0, 10624, 268435457, 0, 10640, 268435457, 0, 10640, 268435457, 0, 10656, 268435457, 0, 10656, 268435457, 0, 11776, 536870912, 0, 11792, 536870912, 0, 11808, 536870912, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 12800, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 14416, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15168, 35791394, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 15488, 1145324612, 0, 16448, 2290614272, 0, 16448, 2290614272, 0, 16448, 2290614272, 0, 16448, 2290614272, 0, 16464, 2290614272, 0, 16464, 2290614272, 0, 16464, 2290614272, 0, 16464, 2290614272, 0, 16480, 2290614272, 0, 16480, 2290614272, 0, 16480, 2290614272, 0, 16480, 2290614272, 0, 17664, 2281701376, 0, 17664, 2281701376, 0, 17668, 2281701376, 0, 17668, 2281701376, 0, 17680, 2281701376, 0, 17680, 2281701376, 0, 17684, 2281701376, 0, 17684, 2281701376, 0, 17696, 2281701376, 0, 17696, 2281701376, 0, 17700, 2281701376, 0, 17700, 2281701376, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387675861388612_580_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387675861388612_580_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..971f5f69 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387675861388612_580_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,352 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 30)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 360 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 2560, 1, 0, 3136, 268501008, 0, 3136, 268501008, 0, 3136, 268501008, 0, 6160, 2, 0, 6176, 2, 0, 6192, 2, 0, 6864, 2, 0, 6880, 2, 0, 6896, 2, 0, 10048, 559240, 0, 10048, 559240, 0, 10048, 559240, 0, 10048, 559240, 0, 10048, 559240, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 12544, 268435473, 0, 12544, 268435473, 0, 12544, 268435473, 0, 17088, 136, 0, 17088, 136, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 2560, 1, 0, 3136, 268501008, 0, 3136, 268501008, 0, 3136, 268501008, 0, 6160, 2, 0, 6176, 2, 0, 6192, 2, 0, 6864, 2, 0, 6880, 2, 0, 6896, 2, 0, 10048, 559240, 0, 10048, 559240, 0, 10048, 559240, 0, 10048, 559240, 0, 10048, 559240, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 11200, 4160749823, 0, 12544, 268435473, 0, 12544, 268435473, 0, 12544, 268435473, 0, 17088, 136, 0, 17088, 136, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0, 18176, 4160749583, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387680416384190_581_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387680416384190_581_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..62a29ee7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387680416384190_581_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,102 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 16785426, 0, 2880, 16785426, 0, 2880, 16785426, 0, 2880, 16785426, 0, 2496, 4194816, 0, 2496, 4194816, 0, 3520, 85, 0, 3520, 85, 0, 3520, 85, 0, 3520, 85, 0, 2880, 16785426, 0, 2880, 16785426, 0, 2880, 16785426, 0, 2880, 16785426, 0, 2496, 4194816, 0, 2496, 4194816, 0, 3520, 85, 0, 3520, 85, 0, 3520, 85, 0, 3520, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387680744944035_582_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387680744944035_582_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..54afcb21 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387680744944035_582_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387680996367185_583_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387680996367185_583_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..97044e5a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387680996367185_583_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1988, 4227136, 0, 1988, 4227136, 0, 1988, 4227136, 0, 1992, 4227136, 0, 1992, 4227136, 0, 1992, 4227136, 0, 2004, 4227136, 0, 2004, 4227136, 0, 2004, 4227136, 0, 2008, 4227136, 0, 2008, 4227136, 0, 2008, 4227136, 0, 4416, 268452352, 0, 4416, 268452352, 0, 4416, 268452352, 0, 4432, 268452352, 0, 4432, 268452352, 0, 4432, 268452352, 0, 1988, 4227136, 0, 1988, 4227136, 0, 1988, 4227136, 0, 1992, 4227136, 0, 1992, 4227136, 0, 1992, 4227136, 0, 2004, 4227136, 0, 2004, 4227136, 0, 2004, 4227136, 0, 2008, 4227136, 0, 2008, 4227136, 0, 2008, 4227136, 0, 4416, 268452352, 0, 4416, 268452352, 0, 4416, 268452352, 0, 4432, 268452352, 0, 4432, 268452352, 0, 4432, 268452352, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387736123726340_585_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387736123726340_585_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e97e3b4c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387736123726340_585_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,297 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 16))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 2684354602, 0, 1728, 2684354602, 0, 1728, 2684354602, 0, 1728, 2684354602, 0, 1728, 2684354602, 0, 2768, 2684354602, 0, 2768, 2684354602, 0, 2768, 2684354602, 0, 2768, 2684354602, 0, 2768, 2684354602, 0, 2784, 2684354602, 0, 2784, 2684354602, 0, 2784, 2684354602, 0, 2784, 2684354602, 0, 2784, 2684354602, 0, 4032, 2684354562, 0, 4032, 2684354562, 0, 4032, 2684354562, 0, 4992, 8388608, 0, 9600, 524288, 0, 11136, 1, 0, 12432, 1, 0, 12448, 1, 0, 12464, 1, 0, 13136, 1, 0, 13152, 1, 0, 13168, 1, 0, 13952, 1, 0, 14400, 73, 0, 14400, 73, 0, 14400, 73, 0, 14976, 136348160, 0, 14976, 136348160, 0, 14976, 136348160, 0, 14976, 136348160, 0, 15872, 136348160, 0, 15872, 136348160, 0, 15872, 136348160, 0, 15872, 136348160, 0, 19072, 136348160, 0, 19072, 136348160, 0, 19072, 136348160, 0, 19072, 136348160, 0, 19648, 1090785280, 0, 19648, 1090785280, 0, 19648, 1090785280, 0, 19648, 1090785280, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 2684354602, 0, 1728, 2684354602, 0, 1728, 2684354602, 0, 1728, 2684354602, 0, 1728, 2684354602, 0, 2768, 2684354602, 0, 2768, 2684354602, 0, 2768, 2684354602, 0, 2768, 2684354602, 0, 2768, 2684354602, 0, 2784, 2684354602, 0, 2784, 2684354602, 0, 2784, 2684354602, 0, 2784, 2684354602, 0, 2784, 2684354602, 0, 4032, 2684354562, 0, 4032, 2684354562, 0, 4032, 2684354562, 0, 4992, 8388608, 0, 9600, 524288, 0, 11136, 1, 0, 12432, 1, 0, 12448, 1, 0, 12464, 1, 0, 13136, 1, 0, 13152, 1, 0, 13168, 1, 0, 13952, 1, 0, 14400, 73, 0, 14400, 73, 0, 14400, 73, 0, 14976, 136348160, 0, 14976, 136348160, 0, 14976, 136348160, 0, 14976, 136348160, 0, 15872, 136348160, 0, 15872, 136348160, 0, 15872, 136348160, 0, 15872, 136348160, 0, 19072, 136348160, 0, 19072, 136348160, 0, 19072, 136348160, 0, 19072, 136348160, 0, 19648, 1090785280, 0, 19648, 1090785280, 0, 19648, 1090785280, 0, 19648, 1090785280, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20224, 1363481681, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0, 20544, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756387972648932496_587_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756387972648932496_587_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2bda3300 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756387972648932496_587_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,357 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3904, 537002016, 0, 3904, 537002016, 0, 3904, 537002016, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 5312, 17, 0, 5312, 17, 0, 10576, 4, 0, 10592, 4, 0, 14528, 128, 0, 16192, 2290614272, 0, 16192, 2290614272, 0, 16192, 2290614272, 0, 16192, 2290614272, 0, 18176, 32768, 0, 18192, 32768, 0, 18944, 2290614272, 0, 18944, 2290614272, 0, 18944, 2290614272, 0, 18944, 2290614272, 0, 576, 17, 0, 576, 17, 0, 3904, 537002016, 0, 3904, 537002016, 0, 3904, 537002016, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4224, 1717986918, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 4672, 978670, 0, 5312, 17, 0, 5312, 17, 0, 10576, 4, 0, 10592, 4, 0, 14528, 128, 0, 16192, 2290614272, 0, 16192, 2290614272, 0, 16192, 2290614272, 0, 16192, 2290614272, 0, 18176, 32768, 0, 18192, 32768, 0, 18944, 2290614272, 0, 18944, 2290614272, 0, 18944, 2290614272, 0, 18944, 2290614272, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388039896250240_589_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388039896250240_589_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..40cb9497 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388039896250240_589_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,230 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 588 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 1, 0, 1504, 1, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 5120, 268501008, 0, 5120, 268501008, 0, 5120, 268501008, 0, 5440, 1048832, 0, 5440, 1048832, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 1488, 1, 0, 1504, 1, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2368, 1717986918, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 2816, 978670, 0, 5120, 268501008, 0, 5120, 268501008, 0, 5120, 268501008, 0, 5440, 1048832, 0, 5440, 1048832, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7184, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7200, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7216, 1610612838, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7888, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7904, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 7920, 1711276646, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0, 8320, 978670, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388045891634479_590_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388045891634479_590_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c22ec74 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388045891634479_590_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,548 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 22))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 18))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((423 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((430 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (447 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (457 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (466 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (477 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (517 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (513 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (509 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (503 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 9, 0, 2048, 9, 0, 4096, 1226833920, 0, 4096, 1226833920, 0, 4096, 1226833920, 0, 4096, 1226833920, 0, 7360, 272696336, 0, 7360, 272696336, 0, 7360, 272696336, 0, 7360, 272696336, 0, 7360, 272696336, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 18880, 136314880, 0, 18880, 136314880, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 33088, 1118208, 0, 33088, 1118208, 0, 33088, 1118208, 0, 32832, 4, 0, 32576, 128, 0, 32192, 123, 0, 32192, 123, 0, 32192, 123, 0, 32192, 123, 0, 32192, 123, 0, 32192, 123, 0, 2048, 9, 0, 2048, 9, 0, 4096, 1226833920, 0, 4096, 1226833920, 0, 4096, 1226833920, 0, 4096, 1226833920, 0, 7360, 272696336, 0, 7360, 272696336, 0, 7360, 272696336, 0, 7360, 272696336, 0, 7360, 272696336, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9232, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 9248, 1431655765, 0, 18880, 136314880, 0, 18880, 136314880, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 21248, 2181570690, 0, 33088, 1118208, 0, 33088, 1118208, 0, 33088, 1118208, 0, 32832, 4, 0, 32576, 128, 0, 32192, 123, 0, 32192, 123, 0, 32192, 123, 0, 32192, 123, 0, 32192, 123, 0, 32192, 123, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388084627382767_591_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388084627382767_591_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0b2e0d49 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388084627382767_591_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 19)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 492 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 4480, 2290614272, 0, 4480, 2290614272, 0, 4480, 2290614272, 0, 4480, 2290614272, 0, 6160, 2147483648, 0, 6176, 2147483648, 0, 7808, 8, 0, 9408, 8, 0, 576, 17, 0, 576, 17, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2304, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2308, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2320, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2324, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2880, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2884, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2896, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 2900, 572662306, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 3840, 1145324612, 0, 4480, 2290614272, 0, 4480, 2290614272, 0, 4480, 2290614272, 0, 4480, 2290614272, 0, 6160, 2147483648, 0, 6176, 2147483648, 0, 7808, 8, 0, 9408, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388089999566887_592_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388089999566887_592_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..74aa1384 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388089999566887_592_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 474 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 4160, 559240, 0, 4160, 559240, 0, 4160, 559240, 0, 4160, 559240, 0, 4160, 559240, 0, 5632, 33587264, 0, 5632, 33587264, 0, 5632, 33587264, 0, 5648, 33587264, 0, 5648, 33587264, 0, 5648, 33587264, 0, 6848, 2097280, 0, 6848, 2097280, 0, 6852, 2097280, 0, 6852, 2097280, 0, 6856, 2097280, 0, 6856, 2097280, 0, 6864, 2097280, 0, 6864, 2097280, 0, 6868, 2097280, 0, 6868, 2097280, 0, 6872, 2097280, 0, 6872, 2097280, 0, 7808, 17301536, 0, 7808, 17301536, 0, 7808, 17301536, 0, 7812, 17301536, 0, 7812, 17301536, 0, 7812, 17301536, 0, 7816, 17301536, 0, 7816, 17301536, 0, 7816, 17301536, 0, 7824, 17301536, 0, 7824, 17301536, 0, 7824, 17301536, 0, 7828, 17301536, 0, 7828, 17301536, 0, 7828, 17301536, 0, 7832, 17301536, 0, 7832, 17301536, 0, 7832, 17301536, 0, 8512, 131076, 0, 8512, 131076, 0, 8528, 131076, 0, 8528, 131076, 0, 576, 17, 0, 576, 17, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2240, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 2256, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3712, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 3728, 1145324612, 0, 4160, 559240, 0, 4160, 559240, 0, 4160, 559240, 0, 4160, 559240, 0, 4160, 559240, 0, 5632, 33587264, 0, 5632, 33587264, 0, 5632, 33587264, 0, 5648, 33587264, 0, 5648, 33587264, 0, 5648, 33587264, 0, 6848, 2097280, 0, 6848, 2097280, 0, 6852, 2097280, 0, 6852, 2097280, 0, 6856, 2097280, 0, 6856, 2097280, 0, 6864, 2097280, 0, 6864, 2097280, 0, 6868, 2097280, 0, 6868, 2097280, 0, 6872, 2097280, 0, 6872, 2097280, 0, 7808, 17301536, 0, 7808, 17301536, 0, 7808, 17301536, 0, 7812, 17301536, 0, 7812, 17301536, 0, 7812, 17301536, 0, 7816, 17301536, 0, 7816, 17301536, 0, 7816, 17301536, 0, 7824, 17301536, 0, 7824, 17301536, 0, 7824, 17301536, 0, 7828, 17301536, 0, 7828, 17301536, 0, 7828, 17301536, 0, 7832, 17301536, 0, 7832, 17301536, 0, 7832, 17301536, 0, 8512, 131076, 0, 8512, 131076, 0, 8528, 131076, 0, 8528, 131076, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388129639022769_593_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388129639022769_593_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44bb98a5 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388129639022769_593_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 16)) { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2704, 8192, 0, 2708, 8192, 0, 2712, 8192, 0, 2720, 8192, 0, 2724, 8192, 0, 2728, 8192, 0, 2736, 8192, 0, 2740, 8192, 0, 2744, 8192, 0, 3344, 18, 0, 3344, 18, 0, 3348, 18, 0, 3348, 18, 0, 3352, 18, 0, 3352, 18, 0, 3360, 18, 0, 3360, 18, 0, 3364, 18, 0, 3364, 18, 0, 3368, 18, 0, 3368, 18, 0, 3376, 18, 0, 3376, 18, 0, 3380, 18, 0, 3380, 18, 0, 3384, 18, 0, 3384, 18, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2704, 8192, 0, 2708, 8192, 0, 2712, 8192, 0, 2720, 8192, 0, 2724, 8192, 0, 2728, 8192, 0, 2736, 8192, 0, 2740, 8192, 0, 2744, 8192, 0, 3344, 18, 0, 3344, 18, 0, 3348, 18, 0, 3348, 18, 0, 3352, 18, 0, 3352, 18, 0, 3360, 18, 0, 3360, 18, 0, 3364, 18, 0, 3364, 18, 0, 3368, 18, 0, 3368, 18, 0, 3376, 18, 0, 3376, 18, 0, 3380, 18, 0, 3380, 18, 0, 3384, 18, 0, 3384, 18, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388136282485478_594_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388136282485478_594_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87772a00 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388136282485478_594_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2640, 2050, 0, 2640, 2050, 0, 2656, 2050, 0, 2656, 2050, 0, 3856, 537001984, 0, 3856, 537001984, 0, 3872, 537001984, 0, 3872, 537001984, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2640, 2050, 0, 2640, 2050, 0, 2656, 2050, 0, 2656, 2050, 0, 3856, 537001984, 0, 3856, 537001984, 0, 3872, 537001984, 0, 3872, 537001984, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388137240729123_595_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388137240729123_595_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..31866bb4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388137240729123_595_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,203 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 16))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0, 2368, 272696336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388138187801352_596_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388138187801352_596_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..22527c2a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388138187801352_596_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1088, 1052800, 0, 1088, 1052800, 0, 1088, 1052800, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1472, 4286578688, 0, 1088, 1052800, 0, 1088, 1052800, 0, 1088, 1052800, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388138385267965_597_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388138385267965_597_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..15f7c757 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388138385267965_597_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 336 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 4608, 32, 0, 4612, 32, 0, 4616, 32, 0, 4624, 32, 0, 4628, 32, 0, 4632, 32, 0, 4640, 32, 0, 4644, 32, 0, 4648, 32, 0, 5696, 2818572290, 0, 5696, 2818572290, 0, 5696, 2818572290, 0, 5696, 2818572290, 0, 5712, 2818572290, 0, 5712, 2818572290, 0, 5712, 2818572290, 0, 5712, 2818572290, 0, 5728, 2818572290, 0, 5728, 2818572290, 0, 5728, 2818572290, 0, 5728, 2818572290, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3136, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3152, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 3168, 2862612522, 0, 4608, 32, 0, 4612, 32, 0, 4616, 32, 0, 4624, 32, 0, 4628, 32, 0, 4632, 32, 0, 4640, 32, 0, 4644, 32, 0, 4648, 32, 0, 5696, 2818572290, 0, 5696, 2818572290, 0, 5696, 2818572290, 0, 5696, 2818572290, 0, 5712, 2818572290, 0, 5712, 2818572290, 0, 5712, 2818572290, 0, 5712, 2818572290, 0, 5728, 2818572290, 0, 5728, 2818572290, 0, 5728, 2818572290, 0, 5728, 2818572290, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388146030279710_598_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388146030279710_598_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..99295e09 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388146030279710_598_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,202 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 10)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 29)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 444 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6208, 131584, 0, 6208, 131584, 0, 6224, 131584, 0, 6224, 131584, 0, 6240, 131584, 0, 6240, 131584, 0, 7296, 34, 0, 7296, 34, 0, 7312, 34, 0, 7312, 34, 0, 7328, 34, 0, 7328, 34, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10576, 3758096384, 0, 10576, 3758096384, 0, 10576, 3758096384, 0, 10592, 3758096384, 0, 10592, 3758096384, 0, 10592, 3758096384, 0, 10608, 3758096384, 0, 10608, 3758096384, 0, 10608, 3758096384, 0, 576, 17, 0, 576, 17, 0, 6208, 131584, 0, 6208, 131584, 0, 6224, 131584, 0, 6224, 131584, 0, 6240, 131584, 0, 6240, 131584, 0, 7296, 34, 0, 7296, 34, 0, 7312, 34, 0, 7312, 34, 0, 7328, 34, 0, 7328, 34, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 8768, 1145324612, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 9216, 838860, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10128, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10144, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10160, 2047, 0, 10576, 3758096384, 0, 10576, 3758096384, 0, 10576, 3758096384, 0, 10592, 3758096384, 0, 10592, 3758096384, 0, 10592, 3758096384, 0, 10608, 3758096384, 0, 10608, 3758096384, 0, 10608, 3758096384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388159223811517_599_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388159223811517_599_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..352bf08d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388159223811517_599_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0, 4352, 4278190080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756388172993895773_601_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756388172993895773_601_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9ffa5c56 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756388172993895773_601_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + break; + } + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1616, 268568577, 0, 1616, 268568577, 0, 1616, 268568577, 0, 1616, 268568577, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0, 1616, 268568577, 0, 1616, 268568577, 0, 1616, 268568577, 0, 1616, 268568577, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2576, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 2580, 4293918720, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3728, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 3732, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4432, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0, 4436, 4160749569, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390379912593395_603_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390379912593395_603_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..068d97c1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390379912593395_603_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 492 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 5, 0, 1040, 5, 0, 1056, 5, 0, 1056, 5, 0, 1072, 5, 0, 1072, 5, 0, 2768, 65792, 0, 2768, 65792, 0, 2772, 65792, 0, 2772, 65792, 0, 2784, 65792, 0, 2784, 65792, 0, 2788, 65792, 0, 2788, 65792, 0, 2800, 65792, 0, 2800, 65792, 0, 2804, 65792, 0, 2804, 65792, 0, 3408, 85, 0, 3408, 85, 0, 3408, 85, 0, 3408, 85, 0, 3412, 85, 0, 3412, 85, 0, 3412, 85, 0, 3412, 85, 0, 3424, 85, 0, 3424, 85, 0, 3424, 85, 0, 3424, 85, 0, 3428, 85, 0, 3428, 85, 0, 3428, 85, 0, 3428, 85, 0, 3440, 85, 0, 3440, 85, 0, 3440, 85, 0, 3440, 85, 0, 3444, 85, 0, 3444, 85, 0, 3444, 85, 0, 3444, 85, 0, 5200, 1028, 0, 5200, 1028, 0, 5204, 1028, 0, 5204, 1028, 0, 5216, 1028, 0, 5216, 1028, 0, 5220, 1028, 0, 5220, 1028, 0, 5232, 1028, 0, 5232, 1028, 0, 5236, 1028, 0, 5236, 1028, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 6400, 8, 0, 11856, 8390656, 0, 11856, 8390656, 0, 11872, 8390656, 0, 11872, 8390656, 0, 11888, 8390656, 0, 11888, 8390656, 0, 1040, 5, 0, 1040, 5, 0, 1056, 5, 0, 1056, 5, 0, 1072, 5, 0, 1072, 5, 0, 2768, 65792, 0, 2768, 65792, 0, 2772, 65792, 0, 2772, 65792, 0, 2784, 65792, 0, 2784, 65792, 0, 2788, 65792, 0, 2788, 65792, 0, 2800, 65792, 0, 2800, 65792, 0, 2804, 65792, 0, 2804, 65792, 0, 3408, 85, 0, 3408, 85, 0, 3408, 85, 0, 3408, 85, 0, 3412, 85, 0, 3412, 85, 0, 3412, 85, 0, 3412, 85, 0, 3424, 85, 0, 3424, 85, 0, 3424, 85, 0, 3424, 85, 0, 3428, 85, 0, 3428, 85, 0, 3428, 85, 0, 3428, 85, 0, 3440, 85, 0, 3440, 85, 0, 3440, 85, 0, 3440, 85, 0, 3444, 85, 0, 3444, 85, 0, 3444, 85, 0, 3444, 85, 0, 5200, 1028, 0, 5200, 1028, 0, 5204, 1028, 0, 5204, 1028, 0, 5216, 1028, 0, 5216, 1028, 0, 5220, 1028, 0, 5220, 1028, 0, 5232, 1028, 0, 5232, 1028, 0, 5236, 1028, 0, 5236, 1028, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5776, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5792, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 5808, 1431568384, 0, 6400, 8, 0, 11856, 8390656, 0, 11856, 8390656, 0, 11872, 8390656, 0, 11872, 8390656, 0, 11888, 8390656, 0, 11888, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390462368174241_605_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390462368174241_605_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3b593229 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390462368174241_605_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 684 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 4112, 73, 0, 4112, 73, 0, 4112, 73, 0, 4128, 73, 0, 4128, 73, 0, 4128, 73, 0, 4144, 73, 0, 4144, 73, 0, 4144, 73, 0, 4688, 272696336, 0, 4688, 272696336, 0, 4688, 272696336, 0, 4688, 272696336, 0, 4688, 272696336, 0, 4704, 272696336, 0, 4704, 272696336, 0, 4704, 272696336, 0, 4704, 272696336, 0, 4704, 272696336, 0, 4720, 272696336, 0, 4720, 272696336, 0, 4720, 272696336, 0, 4720, 272696336, 0, 4720, 272696336, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1104, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1120, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 1136, 4286578689, 0, 4112, 73, 0, 4112, 73, 0, 4112, 73, 0, 4128, 73, 0, 4128, 73, 0, 4128, 73, 0, 4144, 73, 0, 4144, 73, 0, 4144, 73, 0, 4688, 272696336, 0, 4688, 272696336, 0, 4688, 272696336, 0, 4688, 272696336, 0, 4688, 272696336, 0, 4704, 272696336, 0, 4704, 272696336, 0, 4704, 272696336, 0, 4704, 272696336, 0, 4704, 272696336, 0, 4720, 272696336, 0, 4720, 272696336, 0, 4720, 272696336, 0, 4720, 272696336, 0, 4720, 272696336, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5008, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5024, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5040, 612518180, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5712, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5728, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0, 5744, 4292870144, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390518944815456_606_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390518944815456_606_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b85c3ea6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390518944815456_606_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 4096, 0, 2116, 4096, 0, 2128, 4096, 0, 2132, 4096, 0, 3328, 64, 0, 3332, 64, 0, 3344, 64, 0, 3348, 64, 0, 5456, 268443648, 0, 5456, 268443648, 0, 5472, 268443648, 0, 5472, 268443648, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 8976, 545259520, 0, 8976, 545259520, 0, 8992, 545259520, 0, 8992, 545259520, 0, 11088, 1048576, 0, 11104, 1048576, 0, 12560, 256, 0, 12576, 256, 0, 13184, 73, 0, 13184, 73, 0, 13184, 73, 0, 19520, 272696336, 0, 19520, 272696336, 0, 19520, 272696336, 0, 19520, 272696336, 0, 19520, 272696336, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 2112, 4096, 0, 2116, 4096, 0, 2128, 4096, 0, 2132, 4096, 0, 3328, 64, 0, 3332, 64, 0, 3344, 64, 0, 3348, 64, 0, 5456, 268443648, 0, 5456, 268443648, 0, 5472, 268443648, 0, 5472, 268443648, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7568, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 7584, 2449473682, 0, 8976, 545259520, 0, 8976, 545259520, 0, 8992, 545259520, 0, 8992, 545259520, 0, 11088, 1048576, 0, 11104, 1048576, 0, 12560, 256, 0, 12576, 256, 0, 13184, 73, 0, 13184, 73, 0, 13184, 73, 0, 19520, 272696336, 0, 19520, 272696336, 0, 19520, 272696336, 0, 19520, 272696336, 0, 19520, 272696336, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0, 19840, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390586667850735_607_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390586667850735_607_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0a4ee2cd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390586667850735_607_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,192 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 128, 0, 1664, 1049601, 0, 1664, 1049601, 0, 1664, 1049601, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 4288, 2147483648, 0, 3904, 4195328, 0, 3904, 4195328, 0, 3520, 7, 0, 3520, 7, 0, 3520, 7, 0, 6272, 65, 0, 6272, 65, 0, 6288, 65, 0, 6288, 65, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 2048, 128, 0, 1664, 1049601, 0, 1664, 1049601, 0, 1664, 1049601, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 1280, 382, 0, 4288, 2147483648, 0, 3904, 4195328, 0, 3904, 4195328, 0, 3520, 7, 0, 3520, 7, 0, 3520, 7, 0, 6272, 65, 0, 6272, 65, 0, 6288, 65, 0, 6288, 65, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 7680, 1363481681, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0, 8000, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390588125800333_608_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390588125800333_608_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..60b439bc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390588125800333_608_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,401 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 14))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 28)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 570 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 4992, 2621440, 0, 4992, 2621440, 0, 5008, 2621440, 0, 5008, 2621440, 0, 5024, 2621440, 0, 5024, 2621440, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 9152, 2147483648, 0, 9856, 2147483648, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 19008, 272696336, 0, 19008, 272696336, 0, 19008, 272696336, 0, 19008, 272696336, 0, 19008, 272696336, 0, 21568, 536870912, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1792, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1808, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 1824, 2818572970, 0, 4992, 2621440, 0, 4992, 2621440, 0, 5008, 2621440, 0, 5008, 2621440, 0, 5024, 2621440, 0, 5024, 2621440, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5696, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5712, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 5728, 2860515338, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 6784, 2863311530, 0, 9152, 2147483648, 0, 9856, 2147483648, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 11520, 2863311530, 0, 19008, 272696336, 0, 19008, 272696336, 0, 19008, 272696336, 0, 19008, 272696336, 0, 19008, 272696336, 0, 21568, 536870912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390603327870232_609_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390603327870232_609_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2e14b2e2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390603327870232_609_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2064, 83886096, 0, 2064, 83886096, 0, 2064, 83886096, 0, 2080, 83886096, 0, 2080, 83886096, 0, 2080, 83886096, 0, 3536, 17039360, 0, 3536, 17039360, 0, 3552, 17039360, 0, 3552, 17039360, 0, 2064, 83886096, 0, 2064, 83886096, 0, 2064, 83886096, 0, 2080, 83886096, 0, 2080, 83886096, 0, 2080, 83886096, 0, 3536, 17039360, 0, 3536, 17039360, 0, 3552, 17039360, 0, 3552, 17039360, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390603877229303_610_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390603877229303_610_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0bb79919 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390603877229303_610_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1216, 1, 0, 2256, 268501008, 0, 2256, 268501008, 0, 2256, 268501008, 0, 2272, 268501008, 0, 2272, 268501008, 0, 2272, 268501008, 0, 2288, 268501008, 0, 2288, 268501008, 0, 2288, 268501008, 0, 2560, 538050848, 0, 2560, 538050848, 0, 2560, 538050848, 0, 2560, 538050848, 0, 2560, 538050848, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 3328, 559240, 0, 3328, 559240, 0, 3328, 559240, 0, 3328, 559240, 0, 3328, 559240, 0, 576, 17, 0, 576, 17, 0, 1216, 1, 0, 2256, 268501008, 0, 2256, 268501008, 0, 2256, 268501008, 0, 2272, 268501008, 0, 2272, 268501008, 0, 2272, 268501008, 0, 2288, 268501008, 0, 2288, 268501008, 0, 2288, 268501008, 0, 2560, 538050848, 0, 2560, 538050848, 0, 2560, 538050848, 0, 2560, 538050848, 0, 2560, 538050848, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 2880, 1145324612, 0, 3328, 559240, 0, 3328, 559240, 0, 3328, 559240, 0, 3328, 559240, 0, 3328, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390604891794373_611_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390604891794373_611_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6cc8e9e1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390604891794373_611_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 7)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 0)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((110 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2064, 2, 0, 2068, 2, 0, 2080, 2, 0, 2084, 2, 0, 3216, 3, 0, 3216, 3, 0, 3220, 3, 0, 3220, 3, 0, 3232, 3, 0, 3232, 3, 0, 3236, 3, 0, 3236, 3, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 2064, 2, 0, 2068, 2, 0, 2080, 2, 0, 2084, 2, 0, 3216, 3, 0, 3216, 3, 0, 3220, 3, 0, 3220, 3, 0, 3232, 3, 0, 3232, 3, 0, 3236, 3, 0, 3236, 3, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8272, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0, 8288, 1431655680, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390607268753215_612_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390607268753215_612_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f1d3cc3d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390607268753215_612_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,217 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 16)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 6528, 85, 0, 6528, 85, 0, 6528, 85, 0, 6528, 85, 0, 6544, 85, 0, 6544, 85, 0, 6544, 85, 0, 6544, 85, 0, 6560, 85, 0, 6560, 85, 0, 6560, 85, 0, 6560, 85, 0, 6976, 21, 0, 6976, 21, 0, 6976, 21, 0, 6992, 21, 0, 6992, 21, 0, 6992, 21, 0, 7008, 21, 0, 7008, 21, 0, 7008, 21, 0, 8256, 1, 0, 10048, 1426063360, 0, 10048, 1426063360, 0, 10048, 1426063360, 0, 10048, 1426063360, 0, 10064, 1426063360, 0, 10064, 1426063360, 0, 10064, 1426063360, 0, 10064, 1426063360, 0, 11648, 1430257664, 0, 11648, 1430257664, 0, 11648, 1430257664, 0, 11648, 1430257664, 0, 11648, 1430257664, 0, 11664, 1430257664, 0, 11664, 1430257664, 0, 11664, 1430257664, 0, 11664, 1430257664, 0, 11664, 1430257664, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 5376, 21845, 0, 6528, 85, 0, 6528, 85, 0, 6528, 85, 0, 6528, 85, 0, 6544, 85, 0, 6544, 85, 0, 6544, 85, 0, 6544, 85, 0, 6560, 85, 0, 6560, 85, 0, 6560, 85, 0, 6560, 85, 0, 6976, 21, 0, 6976, 21, 0, 6976, 21, 0, 6992, 21, 0, 6992, 21, 0, 6992, 21, 0, 7008, 21, 0, 7008, 21, 0, 7008, 21, 0, 8256, 1, 0, 10048, 1426063360, 0, 10048, 1426063360, 0, 10048, 1426063360, 0, 10048, 1426063360, 0, 10064, 1426063360, 0, 10064, 1426063360, 0, 10064, 1426063360, 0, 10064, 1426063360, 0, 11648, 1430257664, 0, 11648, 1430257664, 0, 11648, 1430257664, 0, 11648, 1430257664, 0, 11648, 1430257664, 0, 11664, 1430257664, 0, 11664, 1430257664, 0, 11664, 1430257664, 0, 11664, 1430257664, 0, 11664, 1430257664, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390610063960575_613_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390610063960575_613_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c081ff9e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390610063960575_613_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 29))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2432, 1140850689, 0, 2432, 1140850689, 0, 2432, 1140850689, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 11200, 8, 0, 12096, 8390656, 0, 12096, 8390656, 0, 2432, 1140850689, 0, 2432, 1140850689, 0, 2432, 1140850689, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 2048, 290805076, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 1664, 2862612650, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 10560, 1145324612, 0, 11200, 8, 0, 12096, 8390656, 0, 12096, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390611255972650_614_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390611255972650_614_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eec0d8ff --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390611255972650_614_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 666 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 272696336, 0, 1536, 272696336, 0, 1536, 272696336, 0, 1536, 272696336, 0, 1536, 272696336, 0, 4368, 1073741845, 0, 4368, 1073741845, 0, 4368, 1073741845, 0, 4368, 1073741845, 0, 4384, 1073741845, 0, 4384, 1073741845, 0, 4384, 1073741845, 0, 4384, 1073741845, 0, 4400, 1073741845, 0, 4400, 1073741845, 0, 4400, 1073741845, 0, 4400, 1073741845, 0, 5712, 1426063361, 0, 5712, 1426063361, 0, 5712, 1426063361, 0, 5712, 1426063361, 0, 5712, 1426063361, 0, 5716, 1426063361, 0, 5716, 1426063361, 0, 5716, 1426063361, 0, 5716, 1426063361, 0, 5716, 1426063361, 0, 5720, 1426063361, 0, 5720, 1426063361, 0, 5720, 1426063361, 0, 5720, 1426063361, 0, 5720, 1426063361, 0, 5728, 1426063361, 0, 5728, 1426063361, 0, 5728, 1426063361, 0, 5728, 1426063361, 0, 5728, 1426063361, 0, 5732, 1426063361, 0, 5732, 1426063361, 0, 5732, 1426063361, 0, 5732, 1426063361, 0, 5732, 1426063361, 0, 5736, 1426063361, 0, 5736, 1426063361, 0, 5736, 1426063361, 0, 5736, 1426063361, 0, 5736, 1426063361, 0, 5744, 1426063361, 0, 5744, 1426063361, 0, 5744, 1426063361, 0, 5744, 1426063361, 0, 5744, 1426063361, 0, 5748, 1426063361, 0, 5748, 1426063361, 0, 5748, 1426063361, 0, 5748, 1426063361, 0, 5748, 1426063361, 0, 5752, 1426063361, 0, 5752, 1426063361, 0, 5752, 1426063361, 0, 5752, 1426063361, 0, 5752, 1426063361, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 8336, 2048, 0, 1536, 272696336, 0, 1536, 272696336, 0, 1536, 272696336, 0, 1536, 272696336, 0, 1536, 272696336, 0, 4368, 1073741845, 0, 4368, 1073741845, 0, 4368, 1073741845, 0, 4368, 1073741845, 0, 4384, 1073741845, 0, 4384, 1073741845, 0, 4384, 1073741845, 0, 4384, 1073741845, 0, 4400, 1073741845, 0, 4400, 1073741845, 0, 4400, 1073741845, 0, 4400, 1073741845, 0, 5712, 1426063361, 0, 5712, 1426063361, 0, 5712, 1426063361, 0, 5712, 1426063361, 0, 5712, 1426063361, 0, 5716, 1426063361, 0, 5716, 1426063361, 0, 5716, 1426063361, 0, 5716, 1426063361, 0, 5716, 1426063361, 0, 5720, 1426063361, 0, 5720, 1426063361, 0, 5720, 1426063361, 0, 5720, 1426063361, 0, 5720, 1426063361, 0, 5728, 1426063361, 0, 5728, 1426063361, 0, 5728, 1426063361, 0, 5728, 1426063361, 0, 5728, 1426063361, 0, 5732, 1426063361, 0, 5732, 1426063361, 0, 5732, 1426063361, 0, 5732, 1426063361, 0, 5732, 1426063361, 0, 5736, 1426063361, 0, 5736, 1426063361, 0, 5736, 1426063361, 0, 5736, 1426063361, 0, 5736, 1426063361, 0, 5744, 1426063361, 0, 5744, 1426063361, 0, 5744, 1426063361, 0, 5744, 1426063361, 0, 5744, 1426063361, 0, 5748, 1426063361, 0, 5748, 1426063361, 0, 5748, 1426063361, 0, 5748, 1426063361, 0, 5748, 1426063361, 0, 5752, 1426063361, 0, 5752, 1426063361, 0, 5752, 1426063361, 0, 5752, 1426063361, 0, 5752, 1426063361, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7380, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7384, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 7388, 2863311530, 0, 8336, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390640432484081_615_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390640432484081_615_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..576bda33 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390640432484081_615_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390640600847546_616_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390640600847546_616_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e4d0fe48 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390640600847546_616_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,322 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 5)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 18))) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 2176, 256, 0, 4096, 17, 0, 4096, 17, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 10176, 2147483648, 0, 10192, 2147483648, 0, 10208, 2147483648, 0, 14224, 16384, 0, 14240, 16384, 0, 14256, 16384, 0, 17232, 67108928, 0, 17232, 67108928, 0, 17248, 67108928, 0, 17248, 67108928, 0, 17264, 67108928, 0, 17264, 67108928, 0, 19600, 32768, 0, 19616, 32768, 0, 20176, 32768, 0, 20192, 32768, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3456, 2863311530, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 3200, 1409286229, 0, 2176, 256, 0, 4096, 17, 0, 4096, 17, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 10176, 2147483648, 0, 10192, 2147483648, 0, 10208, 2147483648, 0, 14224, 16384, 0, 14240, 16384, 0, 14256, 16384, 0, 17232, 67108928, 0, 17232, 67108928, 0, 17248, 67108928, 0, 17248, 67108928, 0, 17264, 67108928, 0, 17264, 67108928, 0, 19600, 32768, 0, 19616, 32768, 0, 20176, 32768, 0, 20192, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390657412664294_617_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390657412664294_617_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..edb863d3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390657412664294_617_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,410 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((380 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (402 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2000, 73728, 0, 2000, 73728, 0, 2016, 73728, 0, 2016, 73728, 0, 4688, 268443648, 0, 4688, 268443648, 0, 4704, 268443648, 0, 4704, 268443648, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 9600, 1, 0, 12544, 1, 0, 14848, 67108864, 0, 14864, 67108864, 0, 20416, 559240, 0, 20416, 559240, 0, 20416, 559240, 0, 20416, 559240, 0, 20416, 559240, 0, 21312, 73, 0, 21312, 73, 0, 21312, 73, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2000, 73728, 0, 2000, 73728, 0, 2016, 73728, 0, 2016, 73728, 0, 4688, 268443648, 0, 4688, 268443648, 0, 4704, 268443648, 0, 4704, 268443648, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 9600, 1, 0, 12544, 1, 0, 14848, 67108864, 0, 14864, 67108864, 0, 20416, 559240, 0, 20416, 559240, 0, 20416, 559240, 0, 20416, 559240, 0, 20416, 559240, 0, 21312, 73, 0, 21312, 73, 0, 21312, 73, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0, 25472, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390669258795034_618_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390669258795034_618_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..11757b85 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390669258795034_618_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 2880, 131072, 0, 2896, 131072, 0, 4608, 32, 0, 4612, 32, 0, 4624, 32, 0, 4628, 32, 0, 5760, 603979780, 0, 5760, 603979780, 0, 5760, 603979780, 0, 5764, 603979780, 0, 5764, 603979780, 0, 5764, 603979780, 0, 5776, 603979780, 0, 5776, 603979780, 0, 5776, 603979780, 0, 5780, 603979780, 0, 5780, 603979780, 0, 5780, 603979780, 0, 8192, 17, 0, 8192, 17, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9728, 8, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 2880, 131072, 0, 2896, 131072, 0, 4608, 32, 0, 4612, 32, 0, 4624, 32, 0, 4628, 32, 0, 5760, 603979780, 0, 5760, 603979780, 0, 5760, 603979780, 0, 5764, 603979780, 0, 5764, 603979780, 0, 5764, 603979780, 0, 5776, 603979780, 0, 5776, 603979780, 0, 5776, 603979780, 0, 5780, 603979780, 0, 5780, 603979780, 0, 5780, 603979780, 0, 8192, 17, 0, 8192, 17, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9088, 1145324612, 0, 9728, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390681536837259_619_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390681536837259_619_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ba426ac6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390681536837259_619_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,220 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 12)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 4608, 32, 0, 6656, 559240, 0, 6656, 559240, 0, 6656, 559240, 0, 6656, 559240, 0, 6656, 559240, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 8448, 1, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 832, 2863311530, 0, 4608, 32, 0, 6656, 559240, 0, 6656, 559240, 0, 6656, 559240, 0, 6656, 559240, 0, 6656, 559240, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7232, 2863311530, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 7808, 1431655765, 0, 8448, 1, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0, 9472, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390682406268859_620_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390682406268859_620_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b74ebde --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390682406268859_620_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,259 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 23))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((176 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3652, 4096, 0, 3668, 4096, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 7504, 128, 0, 7520, 128, 0, 12688, 524288, 0, 12704, 524288, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 14528, 33556480, 0, 14528, 33556480, 0, 14544, 33556480, 0, 14544, 33556480, 0, 14560, 33556480, 0, 14560, 33556480, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 17664, 1074004996, 0, 17664, 1074004996, 0, 17664, 1074004996, 0, 17664, 1074004996, 0, 17680, 1074004996, 0, 17680, 1074004996, 0, 17680, 1074004996, 0, 17680, 1074004996, 0, 17696, 1074004996, 0, 17696, 1074004996, 0, 17696, 1074004996, 0, 17696, 1074004996, 0, 3652, 4096, 0, 3668, 4096, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 6336, 1145324612, 0, 7504, 128, 0, 7520, 128, 0, 12688, 524288, 0, 12704, 524288, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 13312, 85, 0, 14528, 33556480, 0, 14528, 33556480, 0, 14544, 33556480, 0, 14544, 33556480, 0, 14560, 33556480, 0, 14560, 33556480, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16448, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16464, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 16480, 4160749572, 0, 17664, 1074004996, 0, 17664, 1074004996, 0, 17664, 1074004996, 0, 17664, 1074004996, 0, 17680, 1074004996, 0, 17680, 1074004996, 0, 17680, 1074004996, 0, 17680, 1074004996, 0, 17696, 1074004996, 0, 17696, 1074004996, 0, 17696, 1074004996, 0, 17696, 1074004996, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390755854738240_621_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390755854738240_621_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2743c043 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390755854738240_621_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,111 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390784887530895_623_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390784887530895_623_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..afae418c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390784887530895_623_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,422 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 16)) { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 16)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((176 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 24)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 4))) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((364 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (371 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (381 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (390 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 12096, 2281701376, 0, 12096, 2281701376, 0, 13136, 2281701376, 0, 13136, 2281701376, 0, 13152, 2281701376, 0, 13152, 2281701376, 0, 15360, 17, 0, 15360, 17, 0, 19328, 570425344, 0, 19328, 570425344, 0, 20480, 536870914, 0, 20480, 536870914, 0, 20928, 546, 0, 20928, 546, 0, 20928, 546, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 576, 17, 0, 576, 17, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 5696, 35791394, 0, 12096, 2281701376, 0, 12096, 2281701376, 0, 13136, 2281701376, 0, 13136, 2281701376, 0, 13152, 2281701376, 0, 13152, 2281701376, 0, 15360, 17, 0, 15360, 17, 0, 19328, 570425344, 0, 19328, 570425344, 0, 20480, 536870914, 0, 20480, 536870914, 0, 20928, 546, 0, 20928, 546, 0, 20928, 546, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0, 21248, 1145324612, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390910283058448_625_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390910283058448_625_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..df44f94e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390910283058448_625_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,402 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 19))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + case 3: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 19))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 31)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((384 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 456 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4480, 4096, 0, 6160, 65536, 0, 6176, 65536, 0, 7552, 16, 0, 9280, 1140850688, 0, 9280, 1140850688, 0, 10192, 1140850688, 0, 10192, 1140850688, 0, 10208, 1140850688, 0, 10208, 1140850688, 0, 10224, 1140850688, 0, 10224, 1140850688, 0, 10640, 1140850688, 0, 10640, 1140850688, 0, 10656, 1140850688, 0, 10656, 1140850688, 0, 10672, 1140850688, 0, 10672, 1140850688, 0, 11328, 4, 0, 12544, 64, 0, 16384, 524288, 0, 16400, 524288, 0, 16416, 524288, 0, 17216, 72, 0, 17216, 72, 0, 17792, 1078199360, 0, 17792, 1078199360, 0, 17792, 1078199360, 0, 17792, 1078199360, 0, 17792, 1078199360, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 19776, 16384, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 24592, 16777216, 0, 24608, 16777216, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 4480, 4096, 0, 6160, 65536, 0, 6176, 65536, 0, 7552, 16, 0, 9280, 1140850688, 0, 9280, 1140850688, 0, 10192, 1140850688, 0, 10192, 1140850688, 0, 10208, 1140850688, 0, 10208, 1140850688, 0, 10224, 1140850688, 0, 10224, 1140850688, 0, 10640, 1140850688, 0, 10640, 1140850688, 0, 10656, 1140850688, 0, 10656, 1140850688, 0, 10672, 1140850688, 0, 10672, 1140850688, 0, 11328, 4, 0, 12544, 64, 0, 16384, 524288, 0, 16400, 524288, 0, 16416, 524288, 0, 17216, 72, 0, 17216, 72, 0, 17792, 1078199360, 0, 17792, 1078199360, 0, 17792, 1078199360, 0, 17792, 1078199360, 0, 17792, 1078199360, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 18112, 3435449544, 0, 19776, 16384, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 23680, 4286578815, 0, 24592, 16777216, 0, 24608, 16777216, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0, 25280, 4261412927, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390930219677570_626_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390930219677570_626_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b56225fc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390930219677570_626_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 17))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3200, 2449473554, 0, 3200, 2449473554, 0, 3200, 2449473554, 0, 3200, 2449473554, 0, 3200, 2449473554, 0, 3776, 4260864, 0, 3776, 4260864, 0, 3776, 4260864, 0, 4352, 532608, 0, 4352, 532608, 0, 4352, 532608, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 5632, 85, 0, 5632, 85, 0, 5632, 85, 0, 5632, 85, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 7104, 85, 0, 7104, 85, 0, 7104, 85, 0, 7104, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3200, 2449473554, 0, 3200, 2449473554, 0, 3200, 2449473554, 0, 3200, 2449473554, 0, 3200, 2449473554, 0, 3776, 4260864, 0, 3776, 4260864, 0, 3776, 4260864, 0, 4352, 532608, 0, 4352, 532608, 0, 4352, 532608, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 5632, 85, 0, 5632, 85, 0, 5632, 85, 0, 5632, 85, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 6208, 1431655765, 0, 7104, 85, 0, 7104, 85, 0, 7104, 85, 0, 7104, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390934619285894_627_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390934619285894_627_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..825bd507 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390934619285894_627_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,209 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 25)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 2))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 9, 0, 3264, 9, 0, 4032, 1, 0, 4608, 272696336, 0, 4608, 272696336, 0, 4608, 272696336, 0, 4608, 272696336, 0, 4608, 272696336, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 3264, 9, 0, 3264, 9, 0, 4032, 1, 0, 4608, 272696336, 0, 4608, 272696336, 0, 4608, 272696336, 0, 4608, 272696336, 0, 4608, 272696336, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0, 8128, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390935278486331_628_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390935278486331_628_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4f5ffad7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390935278486331_628_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,239 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4352, 512, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7872, 8, 0, 9344, 2148007936, 0, 9344, 2148007936, 0, 9348, 2148007936, 0, 9348, 2148007936, 0, 9352, 2148007936, 0, 9352, 2148007936, 0, 9360, 2148007936, 0, 9360, 2148007936, 0, 9364, 2148007936, 0, 9364, 2148007936, 0, 9368, 2148007936, 0, 9368, 2148007936, 0, 9376, 2148007936, 0, 9376, 2148007936, 0, 9380, 2148007936, 0, 9380, 2148007936, 0, 9384, 2148007936, 0, 9384, 2148007936, 0, 10432, 8390656, 0, 10432, 8390656, 0, 576, 17, 0, 576, 17, 0, 4352, 512, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7232, 1145324612, 0, 7872, 8, 0, 9344, 2148007936, 0, 9344, 2148007936, 0, 9348, 2148007936, 0, 9348, 2148007936, 0, 9352, 2148007936, 0, 9352, 2148007936, 0, 9360, 2148007936, 0, 9360, 2148007936, 0, 9364, 2148007936, 0, 9364, 2148007936, 0, 9368, 2148007936, 0, 9368, 2148007936, 0, 9376, 2148007936, 0, 9376, 2148007936, 0, 9380, 2148007936, 0, 9380, 2148007936, 0, 9384, 2148007936, 0, 9384, 2148007936, 0, 10432, 8390656, 0, 10432, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390937310506032_629_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390937310506032_629_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..245a0bff --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390937310506032_629_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2560, 17842176, 0, 2560, 17842176, 0, 2560, 17842176, 0, 3584, 85, 0, 3584, 85, 0, 3584, 85, 0, 3584, 85, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2944, 4261412927, 0, 2560, 17842176, 0, 2560, 17842176, 0, 2560, 17842176, 0, 3584, 85, 0, 3584, 85, 0, 3584, 85, 0, 3584, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390948045256514_631_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390948045256514_631_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..82e79c82 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390948045256514_631_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,79 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 136331265, 0, 1792, 136331265, 0, 1792, 136331265, 0, 1792, 136331265, 0, 1808, 136331265, 0, 1808, 136331265, 0, 1808, 136331265, 0, 1808, 136331265, 0, 2752, 3758096384, 0, 2752, 3758096384, 0, 2752, 3758096384, 0, 2756, 3758096384, 0, 2756, 3758096384, 0, 2756, 3758096384, 0, 2760, 3758096384, 0, 2760, 3758096384, 0, 2760, 3758096384, 0, 2768, 3758096384, 0, 2768, 3758096384, 0, 2768, 3758096384, 0, 2772, 3758096384, 0, 2772, 3758096384, 0, 2772, 3758096384, 0, 2776, 3758096384, 0, 2776, 3758096384, 0, 2776, 3758096384, 0, 1792, 136331265, 0, 1792, 136331265, 0, 1792, 136331265, 0, 1792, 136331265, 0, 1808, 136331265, 0, 1808, 136331265, 0, 1808, 136331265, 0, 1808, 136331265, 0, 2752, 3758096384, 0, 2752, 3758096384, 0, 2752, 3758096384, 0, 2756, 3758096384, 0, 2756, 3758096384, 0, 2756, 3758096384, 0, 2760, 3758096384, 0, 2760, 3758096384, 0, 2760, 3758096384, 0, 2768, 3758096384, 0, 2768, 3758096384, 0, 2768, 3758096384, 0, 2772, 3758096384, 0, 2772, 3758096384, 0, 2772, 3758096384, 0, 2776, 3758096384, 0, 2776, 3758096384, 0, 2776, 3758096384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390965705367757_633_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390965705367757_633_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dae05c85 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390965705367757_633_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 27)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1936, 2415919104, 0, 1936, 2415919104, 0, 1952, 2415919104, 0, 1952, 2415919104, 0, 2640, 2415919104, 0, 2640, 2415919104, 0, 2656, 2415919104, 0, 2656, 2415919104, 0, 3264, 2415919104, 0, 3264, 2415919104, 0, 6208, 524304, 0, 6208, 524304, 0, 6224, 524304, 0, 6224, 524304, 0, 7236, 4260880, 0, 7236, 4260880, 0, 7236, 4260880, 0, 7236, 4260880, 0, 7240, 4260880, 0, 7240, 4260880, 0, 7240, 4260880, 0, 7240, 4260880, 0, 7244, 4260880, 0, 7244, 4260880, 0, 7244, 4260880, 0, 7244, 4260880, 0, 7252, 4260880, 0, 7252, 4260880, 0, 7252, 4260880, 0, 7252, 4260880, 0, 7256, 4260880, 0, 7256, 4260880, 0, 7256, 4260880, 0, 7256, 4260880, 0, 7260, 4260880, 0, 7260, 4260880, 0, 7260, 4260880, 0, 7260, 4260880, 0, 8640, 8192, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1936, 2415919104, 0, 1936, 2415919104, 0, 1952, 2415919104, 0, 1952, 2415919104, 0, 2640, 2415919104, 0, 2640, 2415919104, 0, 2656, 2415919104, 0, 2656, 2415919104, 0, 3264, 2415919104, 0, 3264, 2415919104, 0, 6208, 524304, 0, 6208, 524304, 0, 6224, 524304, 0, 6224, 524304, 0, 7236, 4260880, 0, 7236, 4260880, 0, 7236, 4260880, 0, 7236, 4260880, 0, 7240, 4260880, 0, 7240, 4260880, 0, 7240, 4260880, 0, 7240, 4260880, 0, 7244, 4260880, 0, 7244, 4260880, 0, 7244, 4260880, 0, 7244, 4260880, 0, 7252, 4260880, 0, 7252, 4260880, 0, 7252, 4260880, 0, 7252, 4260880, 0, 7256, 4260880, 0, 7256, 4260880, 0, 7256, 4260880, 0, 7256, 4260880, 0, 7260, 4260880, 0, 7260, 4260880, 0, 7260, 4260880, 0, 7260, 4260880, 0, 8640, 8192, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0, 8960, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390968251569086_634_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390968251569086_634_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5ab6d481 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390968251569086_634_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2244, 2, 0, 2248, 2, 0, 2260, 2, 0, 2264, 2, 0, 2276, 2, 0, 2280, 2, 0, 3136, 2, 0, 3152, 2, 0, 3168, 2, 0, 2244, 2, 0, 2248, 2, 0, 2260, 2, 0, 2264, 2, 0, 2276, 2, 0, 2280, 2, 0, 3136, 2, 0, 3152, 2, 0, 3168, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756390968484448410_635_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756390968484448410_635_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d4d936a7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756390968484448410_635_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,466 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((251 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((270 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((373 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((412 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((422 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((429 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((436 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i7 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (446 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((464 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter8 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1374 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 7952, 67108864, 0, 7968, 67108864, 0, 7984, 67108864, 0, 8592, 73, 0, 8592, 73, 0, 8592, 73, 0, 8608, 73, 0, 8608, 73, 0, 8608, 73, 0, 8624, 73, 0, 8624, 73, 0, 8624, 73, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 10192, 536870912, 0, 10208, 536870912, 0, 10224, 536870912, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 11392, 17, 0, 11392, 17, 0, 14528, 2, 0, 14544, 2, 0, 14560, 2, 0, 18880, 1073741824, 0, 24832, 1073741824, 0, 26368, 4, 0, 26384, 4, 0, 27904, 4194304, 0, 27920, 4194304, 0, 28544, 4, 0, 29712, 2281701376, 0, 29712, 2281701376, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 1088, 3758096415, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3796, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3800, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3812, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3816, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3828, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 3832, 2852126730, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4816, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4832, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 4848, 1431655765, 0, 7952, 67108864, 0, 7968, 67108864, 0, 7984, 67108864, 0, 8592, 73, 0, 8592, 73, 0, 8592, 73, 0, 8608, 73, 0, 8608, 73, 0, 8608, 73, 0, 8624, 73, 0, 8624, 73, 0, 8624, 73, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9168, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9184, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9200, 1363480657, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9488, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9504, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 9520, 613566756, 0, 10192, 536870912, 0, 10208, 536870912, 0, 10224, 536870912, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10768, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10784, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 10800, 2863311530, 0, 11392, 17, 0, 11392, 17, 0, 14528, 2, 0, 14544, 2, 0, 14560, 2, 0, 18880, 1073741824, 0, 24832, 1073741824, 0, 26368, 4, 0, 26384, 4, 0, 27904, 4194304, 0, 27920, 4194304, 0, 28544, 4, 0, 29712, 2281701376, 0, 29712, 2281701376, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756391064727877210_637_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756391064727877210_637_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ae99ff6e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756391064727877210_637_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,136 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3536, 4, 0, 3552, 4, 0, 7168, 4, 0, 3536, 4, 0, 3552, 4, 0, 7168, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756391064956265145_638_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756391064956265145_638_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48629ece --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756391064956265145_638_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,342 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 17))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4420, 16, 0, 4424, 16, 0, 4436, 16, 0, 4440, 16, 0, 4452, 16, 0, 4456, 16, 0, 5252, 16, 0, 5256, 16, 0, 5268, 16, 0, 5272, 16, 0, 5284, 16, 0, 5288, 16, 0, 9344, 17, 0, 9344, 17, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 14976, 8, 0, 15872, 8390656, 0, 15872, 8390656, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 19024, 16794688, 0, 19024, 16794688, 0, 19024, 16794688, 0, 19024, 16794688, 0, 19040, 16794688, 0, 19040, 16794688, 0, 19040, 16794688, 0, 19040, 16794688, 0, 4420, 16, 0, 4424, 16, 0, 4436, 16, 0, 4440, 16, 0, 4452, 16, 0, 4456, 16, 0, 5252, 16, 0, 5256, 16, 0, 5268, 16, 0, 5272, 16, 0, 5284, 16, 0, 5288, 16, 0, 9344, 17, 0, 9344, 17, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 13056, 1145324612, 0, 14976, 8, 0, 15872, 8390656, 0, 15872, 8390656, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 17216, 1431655765, 0, 19024, 16794688, 0, 19024, 16794688, 0, 19024, 16794688, 0, 19024, 16794688, 0, 19040, 16794688, 0, 19040, 16794688, 0, 19040, 16794688, 0, 19040, 16794688, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756391089831077501_639_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756391089831077501_639_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..32a0738d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756391089831077501_639_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2880, 1073758208, 0, 2880, 1073758208, 0, 2896, 1073758208, 0, 2896, 1073758208, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 6272, 134217728, 0, 6288, 134217728, 0, 6304, 134217728, 0, 6720, 8, 0, 6736, 8, 0, 6752, 8, 0, 7808, 8390656, 0, 7808, 8390656, 0, 576, 17, 0, 576, 17, 0, 2880, 1073758208, 0, 2880, 1073758208, 0, 2896, 1073758208, 0, 2896, 1073758208, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4096, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4100, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4104, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4112, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4116, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 4120, 1145324612, 0, 6272, 134217728, 0, 6288, 134217728, 0, 6304, 134217728, 0, 6720, 8, 0, 6736, 8, 0, 6752, 8, 0, 7808, 8390656, 0, 7808, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756391467729858880_642_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756391467729858880_642_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..41d22d78 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756391467729858880_642_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 21)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 16)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6656, 2453667840, 0, 6656, 2453667840, 0, 6656, 2453667840, 0, 6656, 2453667840, 0, 10624, 2449473536, 0, 10624, 2449473536, 0, 10624, 2449473536, 0, 11392, 292, 0, 11392, 292, 0, 11392, 292, 0, 12608, 36, 0, 12608, 36, 0, 14208, 536870912, 0, 6656, 2453667840, 0, 6656, 2453667840, 0, 6656, 2453667840, 0, 6656, 2453667840, 0, 10624, 2449473536, 0, 10624, 2449473536, 0, 10624, 2449473536, 0, 11392, 292, 0, 11392, 292, 0, 11392, 292, 0, 12608, 36, 0, 12608, 36, 0, 14208, 536870912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756391468388696195_643_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756391468388696195_643_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..32af24dd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756391468388696195_643_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 768, 73, 0, 768, 73, 0, 768, 73, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5440, 272696336, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0, 5760, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756391468719017166_644_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756391468719017166_644_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..592afd31 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756391468719017166_644_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,345 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 16))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((235 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((271 << 6) | (i4 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((280 << 6) | (i4 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((326 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((363 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 864 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 65536, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 13952, 1073741825, 0, 13952, 1073741825, 0, 13968, 1073741825, 0, 13968, 1073741825, 0, 13984, 1073741825, 0, 13984, 1073741825, 0, 15040, 1073741825, 0, 15040, 1073741825, 0, 15044, 1073741825, 0, 15044, 1073741825, 0, 15048, 1073741825, 0, 15048, 1073741825, 0, 15056, 1073741825, 0, 15056, 1073741825, 0, 15060, 1073741825, 0, 15060, 1073741825, 0, 15064, 1073741825, 0, 15064, 1073741825, 0, 15072, 1073741825, 0, 15072, 1073741825, 0, 15076, 1073741825, 0, 15076, 1073741825, 0, 15080, 1073741825, 0, 15080, 1073741825, 0, 16320, 1073741825, 0, 16320, 1073741825, 0, 16336, 1073741825, 0, 16336, 1073741825, 0, 16352, 1073741825, 0, 16352, 1073741825, 0, 17348, 17043520, 0, 17348, 17043520, 0, 17348, 17043520, 0, 17348, 17043520, 0, 17352, 17043520, 0, 17352, 17043520, 0, 17352, 17043520, 0, 17352, 17043520, 0, 17364, 17043520, 0, 17364, 17043520, 0, 17364, 17043520, 0, 17364, 17043520, 0, 17368, 17043520, 0, 17368, 17043520, 0, 17368, 17043520, 0, 17368, 17043520, 0, 17380, 17043520, 0, 17380, 17043520, 0, 17380, 17043520, 0, 17380, 17043520, 0, 17384, 17043520, 0, 17384, 17043520, 0, 17384, 17043520, 0, 17384, 17043520, 0, 17924, 136348168, 0, 17924, 136348168, 0, 17924, 136348168, 0, 17924, 136348168, 0, 17924, 136348168, 0, 17928, 136348168, 0, 17928, 136348168, 0, 17928, 136348168, 0, 17928, 136348168, 0, 17928, 136348168, 0, 17940, 136348168, 0, 17940, 136348168, 0, 17940, 136348168, 0, 17940, 136348168, 0, 17940, 136348168, 0, 17944, 136348168, 0, 17944, 136348168, 0, 17944, 136348168, 0, 17944, 136348168, 0, 17944, 136348168, 0, 17956, 136348168, 0, 17956, 136348168, 0, 17956, 136348168, 0, 17956, 136348168, 0, 17956, 136348168, 0, 17960, 136348168, 0, 17960, 136348168, 0, 17960, 136348168, 0, 17960, 136348168, 0, 17960, 136348168, 0, 19136, 17043520, 0, 19136, 17043520, 0, 19136, 17043520, 0, 19136, 17043520, 0, 19136, 1073741825, 0, 19136, 1073741825, 0, 19152, 17043520, 0, 19152, 17043520, 0, 19152, 17043520, 0, 19152, 17043520, 0, 19152, 1073741825, 0, 19152, 1073741825, 0, 19168, 17043520, 0, 19168, 17043520, 0, 19168, 17043520, 0, 19168, 17043520, 0, 19168, 1073741825, 0, 19168, 1073741825, 0, 19712, 272696336, 0, 19712, 272696336, 0, 19712, 272696336, 0, 19712, 272696336, 0, 19712, 272696336, 0, 21824, 1179680, 0, 21824, 1179680, 0, 21824, 1179680, 0, 21840, 1179680, 0, 21840, 1179680, 0, 21840, 1179680, 0, 22528, 8388608, 0, 22544, 8388608, 0, 23232, 36, 0, 23232, 36, 0, 23248, 36, 0, 23248, 36, 0, 2304, 65536, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8260, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8276, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 8292, 1145324612, 0, 13952, 1073741825, 0, 13952, 1073741825, 0, 13968, 1073741825, 0, 13968, 1073741825, 0, 13984, 1073741825, 0, 13984, 1073741825, 0, 15040, 1073741825, 0, 15040, 1073741825, 0, 15044, 1073741825, 0, 15044, 1073741825, 0, 15048, 1073741825, 0, 15048, 1073741825, 0, 15056, 1073741825, 0, 15056, 1073741825, 0, 15060, 1073741825, 0, 15060, 1073741825, 0, 15064, 1073741825, 0, 15064, 1073741825, 0, 15072, 1073741825, 0, 15072, 1073741825, 0, 15076, 1073741825, 0, 15076, 1073741825, 0, 15080, 1073741825, 0, 15080, 1073741825, 0, 16320, 1073741825, 0, 16320, 1073741825, 0, 16336, 1073741825, 0, 16336, 1073741825, 0, 16352, 1073741825, 0, 16352, 1073741825, 0, 17348, 17043520, 0, 17348, 17043520, 0, 17348, 17043520, 0, 17348, 17043520, 0, 17352, 17043520, 0, 17352, 17043520, 0, 17352, 17043520, 0, 17352, 17043520, 0, 17364, 17043520, 0, 17364, 17043520, 0, 17364, 17043520, 0, 17364, 17043520, 0, 17368, 17043520, 0, 17368, 17043520, 0, 17368, 17043520, 0, 17368, 17043520, 0, 17380, 17043520, 0, 17380, 17043520, 0, 17380, 17043520, 0, 17380, 17043520, 0, 17384, 17043520, 0, 17384, 17043520, 0, 17384, 17043520, 0, 17384, 17043520, 0, 17924, 136348168, 0, 17924, 136348168, 0, 17924, 136348168, 0, 17924, 136348168, 0, 17924, 136348168, 0, 17928, 136348168, 0, 17928, 136348168, 0, 17928, 136348168, 0, 17928, 136348168, 0, 17928, 136348168, 0, 17940, 136348168, 0, 17940, 136348168, 0, 17940, 136348168, 0, 17940, 136348168, 0, 17940, 136348168, 0, 17944, 136348168, 0, 17944, 136348168, 0, 17944, 136348168, 0, 17944, 136348168, 0, 17944, 136348168, 0, 17956, 136348168, 0, 17956, 136348168, 0, 17956, 136348168, 0, 17956, 136348168, 0, 17956, 136348168, 0, 17960, 136348168, 0, 17960, 136348168, 0, 17960, 136348168, 0, 17960, 136348168, 0, 17960, 136348168, 0, 19136, 17043520, 0, 19136, 17043520, 0, 19136, 17043520, 0, 19136, 17043520, 0, 19136, 1073741825, 0, 19136, 1073741825, 0, 19152, 17043520, 0, 19152, 17043520, 0, 19152, 17043520, 0, 19152, 17043520, 0, 19152, 1073741825, 0, 19152, 1073741825, 0, 19168, 17043520, 0, 19168, 17043520, 0, 19168, 17043520, 0, 19168, 17043520, 0, 19168, 1073741825, 0, 19168, 1073741825, 0, 19712, 272696336, 0, 19712, 272696336, 0, 19712, 272696336, 0, 19712, 272696336, 0, 19712, 272696336, 0, 21824, 1179680, 0, 21824, 1179680, 0, 21824, 1179680, 0, 21840, 1179680, 0, 21840, 1179680, 0, 21840, 1179680, 0, 22528, 8388608, 0, 22544, 8388608, 0, 23232, 36, 0, 23232, 36, 0, 23248, 36, 0, 23248, 36, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756391572680317955_645_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756391572680317955_645_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fc819bb2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756391572680317955_645_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,376 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((223 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((233 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((242 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((246 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((253 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((299 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((313 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((322 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((348 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((370 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 11216, 536870912, 0, 11232, 536870912, 0, 11248, 536870912, 0, 12416, 2097152, 0, 14928, 68, 0, 14928, 68, 0, 14932, 68, 0, 14932, 68, 0, 14944, 68, 0, 14944, 68, 0, 14948, 68, 0, 14948, 68, 0, 22288, 32768, 0, 22304, 32768, 0, 22320, 32768, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 576, 17, 0, 576, 17, 0, 11216, 536870912, 0, 11232, 536870912, 0, 11248, 536870912, 0, 12416, 2097152, 0, 14928, 68, 0, 14928, 68, 0, 14932, 68, 0, 14932, 68, 0, 14944, 68, 0, 14944, 68, 0, 14948, 68, 0, 14948, 68, 0, 22288, 32768, 0, 22304, 32768, 0, 22320, 32768, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23696, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23712, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0, 23728, 4261412865, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756391652931456800_647_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756391652931456800_647_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48c90e2d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756391652931456800_647_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,364 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 16)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((207 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((253 << 6) | (i1 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((262 << 6) | (i1 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 10432, 2048, 0, 10448, 2048, 0, 10464, 2048, 0, 16192, 545261600, 0, 16192, 545261600, 0, 16192, 545261600, 0, 16192, 545261600, 0, 16196, 545261600, 0, 16196, 545261600, 0, 16196, 545261600, 0, 16196, 545261600, 0, 16208, 545261600, 0, 16208, 545261600, 0, 16208, 545261600, 0, 16208, 545261600, 0, 16212, 545261600, 0, 16212, 545261600, 0, 16212, 545261600, 0, 16212, 545261600, 0, 16224, 545261600, 0, 16224, 545261600, 0, 16224, 545261600, 0, 16224, 545261600, 0, 16228, 545261600, 0, 16228, 545261600, 0, 16228, 545261600, 0, 16228, 545261600, 0, 16768, 545261600, 0, 16768, 545261600, 0, 16768, 545261600, 0, 16768, 545261600, 0, 16772, 545261600, 0, 16772, 545261600, 0, 16772, 545261600, 0, 16772, 545261600, 0, 16784, 545261600, 0, 16784, 545261600, 0, 16784, 545261600, 0, 16784, 545261600, 0, 16788, 545261600, 0, 16788, 545261600, 0, 16788, 545261600, 0, 16788, 545261600, 0, 16800, 545261600, 0, 16800, 545261600, 0, 16800, 545261600, 0, 16800, 545261600, 0, 16804, 545261600, 0, 16804, 545261600, 0, 16804, 545261600, 0, 16804, 545261600, 0, 17856, 4, 0, 17872, 4, 0, 17888, 4, 0, 18496, 85, 0, 18496, 85, 0, 18496, 85, 0, 18496, 85, 0, 21120, 8, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 10432, 2048, 0, 10448, 2048, 0, 10464, 2048, 0, 16192, 545261600, 0, 16192, 545261600, 0, 16192, 545261600, 0, 16192, 545261600, 0, 16196, 545261600, 0, 16196, 545261600, 0, 16196, 545261600, 0, 16196, 545261600, 0, 16208, 545261600, 0, 16208, 545261600, 0, 16208, 545261600, 0, 16208, 545261600, 0, 16212, 545261600, 0, 16212, 545261600, 0, 16212, 545261600, 0, 16212, 545261600, 0, 16224, 545261600, 0, 16224, 545261600, 0, 16224, 545261600, 0, 16224, 545261600, 0, 16228, 545261600, 0, 16228, 545261600, 0, 16228, 545261600, 0, 16228, 545261600, 0, 16768, 545261600, 0, 16768, 545261600, 0, 16768, 545261600, 0, 16768, 545261600, 0, 16772, 545261600, 0, 16772, 545261600, 0, 16772, 545261600, 0, 16772, 545261600, 0, 16784, 545261600, 0, 16784, 545261600, 0, 16784, 545261600, 0, 16784, 545261600, 0, 16788, 545261600, 0, 16788, 545261600, 0, 16788, 545261600, 0, 16788, 545261600, 0, 16800, 545261600, 0, 16800, 545261600, 0, 16800, 545261600, 0, 16800, 545261600, 0, 16804, 545261600, 0, 16804, 545261600, 0, 16804, 545261600, 0, 16804, 545261600, 0, 17856, 4, 0, 17872, 4, 0, 17888, 4, 0, 18496, 85, 0, 18496, 85, 0, 18496, 85, 0, 18496, 85, 0, 21120, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392344173500754_650_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392344173500754_650_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ed4e7c57 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392344173500754_650_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,74 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 896, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0, 912, 65535, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392344836462856_651_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392344836462856_651_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dfebc666 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392344836462856_651_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 28))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0, 4864, 1363481681, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392368006579287_653_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392368006579287_653_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9b6a25dd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392368006579287_653_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 17563649, 0, 1664, 17563649, 0, 1664, 17563649, 0, 1664, 17563649, 0, 1680, 17563649, 0, 1680, 17563649, 0, 1680, 17563649, 0, 1680, 17563649, 0, 1696, 17563649, 0, 1696, 17563649, 0, 1696, 17563649, 0, 1696, 17563649, 0, 1664, 17563649, 0, 1664, 17563649, 0, 1664, 17563649, 0, 1664, 17563649, 0, 1680, 17563649, 0, 1680, 17563649, 0, 1680, 17563649, 0, 1680, 17563649, 0, 1696, 17563649, 0, 1696, 17563649, 0, 1696, 17563649, 0, 1696, 17563649, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392368357572609_654_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392368357572609_654_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9fdd200c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392368357572609_654_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 2684354570, 0, 1728, 2684354570, 0, 1728, 2684354570, 0, 1728, 2684354570, 0, 4864, 2684354602, 0, 4864, 2684354602, 0, 4864, 2684354602, 0, 4864, 2684354602, 0, 4864, 2684354602, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 7168, 2097152, 0, 9728, 2147483648, 0, 9744, 2147483648, 0, 9760, 2147483648, 0, 11712, 2860515328, 0, 11712, 2860515328, 0, 11712, 2860515328, 0, 11712, 2860515328, 0, 11712, 2860515328, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 2684354570, 0, 1728, 2684354570, 0, 1728, 2684354570, 0, 1728, 2684354570, 0, 4864, 2684354602, 0, 4864, 2684354602, 0, 4864, 2684354602, 0, 4864, 2684354602, 0, 4864, 2684354602, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 6464, 178956288, 0, 7168, 2097152, 0, 9728, 2147483648, 0, 9744, 2147483648, 0, 9760, 2147483648, 0, 11712, 2860515328, 0, 11712, 2860515328, 0, 11712, 2860515328, 0, 11712, 2860515328, 0, 11712, 2860515328, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392369093259021_655_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392369093259021_655_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..437e9cc7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392369093259021_655_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,127 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((68 << 6) | (i0 << 4)) | (i1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 131072, 0, 912, 131072, 0, 4353, 4, 0, 4354, 4, 0, 4357, 4, 0, 4358, 4, 0, 4361, 4, 0, 4362, 4, 0, 4369, 4, 0, 4370, 4, 0, 4373, 4, 0, 4374, 4, 0, 4377, 4, 0, 4378, 4, 0, 7616, 2097152, 0, 7632, 2097152, 0, 896, 131072, 0, 912, 131072, 0, 4353, 4, 0, 4354, 4, 0, 4357, 4, 0, 4358, 4, 0, 4361, 4, 0, 4362, 4, 0, 4369, 4, 0, 4370, 4, 0, 4373, 4, 0, 4374, 4, 0, 4377, 4, 0, 4378, 4, 0, 7616, 2097152, 0, 7632, 2097152, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392371532588768_656_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392371532588768_656_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..37808548 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392371532588768_656_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 1872, 2860515328, 0, 1872, 2860515328, 0, 1872, 2860515328, 0, 1872, 2860515328, 0, 1872, 2860515328, 0, 1888, 2860515328, 0, 1888, 2860515328, 0, 1888, 2860515328, 0, 1888, 2860515328, 0, 1888, 2860515328, 0, 3344, 4194304, 0, 3348, 4194304, 0, 3352, 4194304, 0, 3360, 4194304, 0, 3364, 4194304, 0, 3368, 4194304, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 704, 4290772992, 0, 1872, 2860515328, 0, 1872, 2860515328, 0, 1872, 2860515328, 0, 1872, 2860515328, 0, 1872, 2860515328, 0, 1888, 2860515328, 0, 1888, 2860515328, 0, 1888, 2860515328, 0, 1888, 2860515328, 0, 1888, 2860515328, 0, 3344, 4194304, 0, 3348, 4194304, 0, 3352, 4194304, 0, 3360, 4194304, 0, 3364, 4194304, 0, 3368, 4194304, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392372703777092_657_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392372703777092_657_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3202585c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392372703777092_657_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,135 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2816, 536870914, 0, 2816, 536870914, 0, 2832, 536870914, 0, 2832, 536870914, 0, 5184, 2147483656, 0, 5184, 2147483656, 0, 5188, 2147483656, 0, 5188, 2147483656, 0, 5200, 2147483656, 0, 5200, 2147483656, 0, 5204, 2147483656, 0, 5204, 2147483656, 0, 5632, 8, 0, 5648, 8, 0, 576, 17, 0, 576, 17, 0, 2816, 536870914, 0, 2816, 536870914, 0, 2832, 536870914, 0, 2832, 536870914, 0, 5184, 2147483656, 0, 5184, 2147483656, 0, 5188, 2147483656, 0, 5188, 2147483656, 0, 5200, 2147483656, 0, 5200, 2147483656, 0, 5204, 2147483656, 0, 5204, 2147483656, 0, 5632, 8, 0, 5648, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392373607005319_658_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392373607005319_658_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d209e3cf --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392373607005319_658_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 22)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3392, 2449473536, 0, 3392, 2449473536, 0, 3392, 2449473536, 0, 3408, 2449473536, 0, 3408, 2449473536, 0, 3408, 2449473536, 0, 5248, 8388672, 0, 5248, 8388672, 0, 5264, 8388672, 0, 5264, 8388672, 0, 5952, 4198400, 0, 5952, 4198400, 0, 5968, 4198400, 0, 5968, 4198400, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3392, 2449473536, 0, 3392, 2449473536, 0, 3392, 2449473536, 0, 3408, 2449473536, 0, 3408, 2449473536, 0, 3408, 2449473536, 0, 5248, 8388672, 0, 5248, 8388672, 0, 5264, 8388672, 0, 5264, 8388672, 0, 5952, 4198400, 0, 5952, 4198400, 0, 5968, 4198400, 0, 5968, 4198400, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392375849404239_659_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392375849404239_659_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..713308fa --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392375849404239_659_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 2688, 1342177285, 0, 2688, 1342177285, 0, 2688, 1342177285, 0, 2688, 1342177285, 0, 2304, 67108864, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 3072, 2863311530, 0, 2688, 1342177285, 0, 2688, 1342177285, 0, 2688, 1342177285, 0, 2688, 1342177285, 0, 2304, 67108864, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392376046217698_660_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392376046217698_660_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..105b6375 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392376046217698_660_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5120, 85, 0, 5120, 85, 0, 5120, 85, 0, 5120, 85, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 6336, 85, 0, 6336, 85, 0, 6336, 85, 0, 6336, 85, 0, 5120, 85, 0, 5120, 85, 0, 5120, 85, 0, 5120, 85, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 5696, 1431655765, 0, 6336, 85, 0, 6336, 85, 0, 6336, 85, 0, 6336, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392396503045544_663_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392396503045544_663_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c0de59d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392396503045544_663_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,305 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 10240, 134217728, 0, 10256, 134217728, 0, 10688, 32776, 0, 10688, 32776, 0, 10704, 32776, 0, 10704, 32776, 0, 15616, 524288, 0, 19136, 8390656, 0, 19136, 8390656, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7040, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7056, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7072, 286331153, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 7936, 1145324612, 0, 10240, 134217728, 0, 10256, 134217728, 0, 10688, 32776, 0, 10688, 32776, 0, 10704, 32776, 0, 10704, 32776, 0, 15616, 524288, 0, 19136, 8390656, 0, 19136, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392402434419990_664_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392402434419990_664_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..abbbad08 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392402434419990_664_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,176 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 4352, 2147483648, 0, 4368, 2147483648, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 7376, 341, 0, 7376, 341, 0, 7376, 341, 0, 7376, 341, 0, 7376, 341, 0, 7392, 341, 0, 7392, 341, 0, 7392, 341, 0, 7392, 341, 0, 7392, 341, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 2112, 1363481681, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3648, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 3664, 2852126890, 0, 4352, 2147483648, 0, 4368, 2147483648, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6800, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 6816, 1431633920, 0, 7376, 341, 0, 7376, 341, 0, 7376, 341, 0, 7376, 341, 0, 7376, 341, 0, 7392, 341, 0, 7392, 341, 0, 7392, 341, 0, 7392, 341, 0, 7392, 341, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392412582665524_665_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392412582665524_665_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fed8f97c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392412582665524_665_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 16777216, 0, 2372, 16777216, 0, 2384, 16777216, 0, 2388, 16777216, 0, 3008, 1, 0, 3012, 1, 0, 3024, 1, 0, 3028, 1, 0, 3904, 1074004032, 0, 3904, 1074004032, 0, 3904, 1074004032, 0, 3908, 1074004032, 0, 3908, 1074004032, 0, 3908, 1074004032, 0, 3920, 1074004032, 0, 3920, 1074004032, 0, 3920, 1074004032, 0, 3924, 1074004032, 0, 3924, 1074004032, 0, 3924, 1074004032, 0, 4352, 32776, 0, 4352, 32776, 0, 4356, 32776, 0, 4356, 32776, 0, 4368, 32776, 0, 4368, 32776, 0, 4372, 32776, 0, 4372, 32776, 0, 5056, 32768, 0, 5060, 32768, 0, 5072, 32768, 0, 5076, 32768, 0, 5952, 1073741833, 0, 5952, 1073741833, 0, 5952, 1073741833, 0, 5968, 1073741833, 0, 5968, 1073741833, 0, 5968, 1073741833, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 10112, 1048576, 0, 10128, 1048576, 0, 2368, 16777216, 0, 2372, 16777216, 0, 2384, 16777216, 0, 2388, 16777216, 0, 3008, 1, 0, 3012, 1, 0, 3024, 1, 0, 3028, 1, 0, 3904, 1074004032, 0, 3904, 1074004032, 0, 3904, 1074004032, 0, 3908, 1074004032, 0, 3908, 1074004032, 0, 3908, 1074004032, 0, 3920, 1074004032, 0, 3920, 1074004032, 0, 3920, 1074004032, 0, 3924, 1074004032, 0, 3924, 1074004032, 0, 3924, 1074004032, 0, 4352, 32776, 0, 4352, 32776, 0, 4356, 32776, 0, 4356, 32776, 0, 4368, 32776, 0, 4368, 32776, 0, 4372, 32776, 0, 4372, 32776, 0, 5056, 32768, 0, 5060, 32768, 0, 5072, 32768, 0, 5076, 32768, 0, 5952, 1073741833, 0, 5952, 1073741833, 0, 5952, 1073741833, 0, 5968, 1073741833, 0, 5968, 1073741833, 0, 5968, 1073741833, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 6528, 272696336, 0, 10112, 1048576, 0, 10128, 1048576, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392446430700045_667_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392446430700045_667_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cfd1328c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392446430700045_667_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,352 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 15))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((334 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4864, 570425378, 0, 4864, 570425378, 0, 4864, 570425378, 0, 4864, 570425378, 0, 6400, 537002016, 0, 6400, 537002016, 0, 6400, 537002016, 0, 7360, 570425378, 0, 7360, 570425378, 0, 7360, 570425378, 0, 7360, 570425378, 0, 14016, 67125252, 0, 14016, 67125252, 0, 14016, 67125252, 0, 14464, 559240, 0, 14464, 559240, 0, 14464, 559240, 0, 14464, 559240, 0, 14464, 559240, 0, 24128, 272696336, 0, 24128, 272696336, 0, 24128, 272696336, 0, 24128, 272696336, 0, 24128, 272696336, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 576, 17, 0, 576, 17, 0, 4864, 570425378, 0, 4864, 570425378, 0, 4864, 570425378, 0, 4864, 570425378, 0, 6400, 537002016, 0, 6400, 537002016, 0, 6400, 537002016, 0, 7360, 570425378, 0, 7360, 570425378, 0, 7360, 570425378, 0, 7360, 570425378, 0, 14016, 67125252, 0, 14016, 67125252, 0, 14016, 67125252, 0, 14464, 559240, 0, 14464, 559240, 0, 14464, 559240, 0, 14464, 559240, 0, 14464, 559240, 0, 24128, 272696336, 0, 24128, 272696336, 0, 24128, 272696336, 0, 24128, 272696336, 0, 24128, 272696336, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0, 24448, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392451846054723_668_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392451846054723_668_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f6a89972 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392451846054723_668_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1152, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 1168, 3758096639, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5696, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0, 5712, 4160750591, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392636444164835_670_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392636444164835_670_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0e2f8d32 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392636444164835_670_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 18))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1664, 1048832, 0, 1664, 1048832, 0, 3392, 1073741828, 0, 3392, 1073741828, 0, 4992, 67108864, 0, 7808, 8, 0, 9600, 8390656, 0, 9600, 8390656, 0, 768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1664, 1048832, 0, 1664, 1048832, 0, 3392, 1073741828, 0, 3392, 1073741828, 0, 4992, 67108864, 0, 7808, 8, 0, 9600, 8390656, 0, 9600, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392649584030857_672_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392649584030857_672_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..60738462 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392649584030857_672_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7296, 128, 0, 8512, 272696336, 0, 8512, 272696336, 0, 8512, 272696336, 0, 8512, 272696336, 0, 8512, 272696336, 0, 10560, 268435456, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 7296, 128, 0, 8512, 272696336, 0, 8512, 272696336, 0, 8512, 272696336, 0, 8512, 272696336, 0, 8512, 272696336, 0, 10560, 268435456, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0, 10880, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392695872460189_674_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392695872460189_674_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c13ea89 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392695872460189_674_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 23)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 285212672, 0, 896, 285212672, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 9024, 559240, 0, 9024, 559240, 0, 9024, 559240, 0, 9024, 559240, 0, 9024, 559240, 0, 896, 285212672, 0, 896, 285212672, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 8576, 1145324612, 0, 9024, 559240, 0, 9024, 559240, 0, 9024, 559240, 0, 9024, 559240, 0, 9024, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392696171226856_675_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392696171226856_675_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1469833b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392696171226856_675_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 42, 0, 1216, 42, 0, 1216, 42, 0, 2304, 42, 0, 2304, 42, 0, 2304, 42, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 42, 0, 1216, 42, 0, 1216, 42, 0, 2304, 42, 0, 2304, 42, 0, 2304, 42, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392696402207157_676_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392696402207157_676_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e722710c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392696402207157_676_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,92 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 31)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((24 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1552, 2147483648, 0, 1556, 2147483648, 0, 1568, 2147483648, 0, 1572, 2147483648, 0, 1584, 2147483648, 0, 1588, 2147483648, 0, 2000, 2147483648, 0, 2004, 2147483648, 0, 2016, 2147483648, 0, 2020, 2147483648, 0, 2032, 2147483648, 0, 2036, 2147483648, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 1552, 2147483648, 0, 1556, 2147483648, 0, 1568, 2147483648, 0, 1572, 2147483648, 0, 1584, 2147483648, 0, 1588, 2147483648, 0, 2000, 2147483648, 0, 2004, 2147483648, 0, 2016, 2147483648, 0, 2020, 2147483648, 0, 2032, 2147483648, 0, 2036, 2147483648, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2448, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2452, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2464, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2468, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2480, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0, 2484, 4290772992, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392746013771034_678_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392746013771034_678_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1f56cff8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392746013771034_678_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,291 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 24)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((216 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 11))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((252 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((267 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i5 == 1)) { + continue; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1134 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4224, 1, 0, 5184, 32, 0, 5200, 32, 0, 5216, 32, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7616, 1024, 0, 7632, 1024, 0, 7648, 1024, 0, 11600, 65600, 0, 11600, 65600, 0, 11616, 65600, 0, 11616, 65600, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 18384, 33558530, 0, 18384, 33558530, 0, 18384, 33558530, 0, 18400, 33558530, 0, 18400, 33558530, 0, 18400, 33558530, 0, 4224, 1, 0, 5184, 32, 0, 5200, 32, 0, 5216, 32, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7168, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7172, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7184, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7188, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7200, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7204, 4286579199, 0, 7616, 1024, 0, 7632, 1024, 0, 7648, 1024, 0, 11600, 65600, 0, 11600, 65600, 0, 11616, 65600, 0, 11616, 65600, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12496, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 12512, 2863311530, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13840, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13844, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13856, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 13860, 2852126890, 0, 18384, 33558530, 0, 18384, 33558530, 0, 18384, 33558530, 0, 18400, 33558530, 0, 18400, 33558530, 0, 18400, 33558530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392849149231305_679_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392849149231305_679_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..12bc872b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392849149231305_679_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 65536, 0, 1040, 65536, 0, 1984, 32, 0, 1988, 32, 0, 1992, 32, 0, 2000, 32, 0, 2004, 32, 0, 2008, 32, 0, 3264, 3758096387, 0, 3264, 3758096387, 0, 3264, 3758096387, 0, 3264, 3758096387, 0, 3264, 3758096387, 0, 3268, 3758096387, 0, 3268, 3758096387, 0, 3268, 3758096387, 0, 3268, 3758096387, 0, 3268, 3758096387, 0, 3272, 3758096387, 0, 3272, 3758096387, 0, 3272, 3758096387, 0, 3272, 3758096387, 0, 3272, 3758096387, 0, 3280, 3758096387, 0, 3280, 3758096387, 0, 3280, 3758096387, 0, 3280, 3758096387, 0, 3280, 3758096387, 0, 3284, 3758096387, 0, 3284, 3758096387, 0, 3284, 3758096387, 0, 3284, 3758096387, 0, 3284, 3758096387, 0, 3288, 3758096387, 0, 3288, 3758096387, 0, 3288, 3758096387, 0, 3288, 3758096387, 0, 3288, 3758096387, 0, 3712, 16, 0, 3716, 16, 0, 3720, 16, 0, 3728, 16, 0, 3732, 16, 0, 3736, 16, 0, 4160, 33554432, 0, 4164, 33554432, 0, 4168, 33554432, 0, 4176, 33554432, 0, 4180, 33554432, 0, 4184, 33554432, 0, 1024, 65536, 0, 1040, 65536, 0, 1984, 32, 0, 1988, 32, 0, 1992, 32, 0, 2000, 32, 0, 2004, 32, 0, 2008, 32, 0, 3264, 3758096387, 0, 3264, 3758096387, 0, 3264, 3758096387, 0, 3264, 3758096387, 0, 3264, 3758096387, 0, 3268, 3758096387, 0, 3268, 3758096387, 0, 3268, 3758096387, 0, 3268, 3758096387, 0, 3268, 3758096387, 0, 3272, 3758096387, 0, 3272, 3758096387, 0, 3272, 3758096387, 0, 3272, 3758096387, 0, 3272, 3758096387, 0, 3280, 3758096387, 0, 3280, 3758096387, 0, 3280, 3758096387, 0, 3280, 3758096387, 0, 3280, 3758096387, 0, 3284, 3758096387, 0, 3284, 3758096387, 0, 3284, 3758096387, 0, 3284, 3758096387, 0, 3284, 3758096387, 0, 3288, 3758096387, 0, 3288, 3758096387, 0, 3288, 3758096387, 0, 3288, 3758096387, 0, 3288, 3758096387, 0, 3712, 16, 0, 3716, 16, 0, 3720, 16, 0, 3728, 16, 0, 3732, 16, 0, 3736, 16, 0, 4160, 33554432, 0, 4164, 33554432, 0, 4168, 33554432, 0, 4176, 33554432, 0, 4180, 33554432, 0, 4184, 33554432, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392868663853352_680_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392868663853352_680_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..90954bf5 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392868663853352_680_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 768, 73, 0, 768, 73, 0, 768, 73, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0, 5056, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392868958661419_681_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392868958661419_681_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..61f70ed8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392868958661419_681_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0, 1472, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392869273199725_682_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392869273199725_682_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f46063fd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392869273199725_682_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 268439552, 0, 1808, 268439552, 0, 1824, 268439552, 0, 1824, 268439552, 0, 3136, 134217728, 0, 4288, 134217728, 0, 5120, 134217728, 0, 1808, 268439552, 0, 1808, 268439552, 0, 1824, 268439552, 0, 1824, 268439552, 0, 3136, 134217728, 0, 4288, 134217728, 0, 5120, 134217728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392870337968492_683_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392870337968492_683_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b2b9ad50 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392870337968492_683_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 67174916, 0, 2368, 67174916, 0, 2368, 67174916, 0, 2368, 67174916, 0, 1984, 262145, 0, 1984, 262145, 0, 2368, 67174916, 0, 2368, 67174916, 0, 2368, 67174916, 0, 2368, 67174916, 0, 1984, 262145, 0, 1984, 262145, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392912765381784_686_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392912765381784_686_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3f884588 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392912765381784_686_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0, 3264, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756392913178341369_687_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756392913178341369_687_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d2cffe0c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756392913178341369_687_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 468 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 63, 0, 896, 63, 0, 896, 63, 0, 896, 63, 0, 896, 63, 0, 896, 63, 0, 912, 63, 0, 912, 63, 0, 912, 63, 0, 912, 63, 0, 912, 63, 0, 912, 63, 0, 928, 63, 0, 928, 63, 0, 928, 63, 0, 928, 63, 0, 928, 63, 0, 928, 63, 0, 1988, 85, 0, 1988, 85, 0, 1988, 85, 0, 1988, 85, 0, 1992, 85, 0, 1992, 85, 0, 1992, 85, 0, 1992, 85, 0, 1996, 85, 0, 1996, 85, 0, 1996, 85, 0, 1996, 85, 0, 2004, 85, 0, 2004, 85, 0, 2004, 85, 0, 2004, 85, 0, 2008, 85, 0, 2008, 85, 0, 2008, 85, 0, 2008, 85, 0, 2012, 85, 0, 2012, 85, 0, 2012, 85, 0, 2012, 85, 0, 2020, 85, 0, 2020, 85, 0, 2020, 85, 0, 2020, 85, 0, 2024, 85, 0, 2024, 85, 0, 2024, 85, 0, 2024, 85, 0, 2028, 85, 0, 2028, 85, 0, 2028, 85, 0, 2028, 85, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 896, 63, 0, 896, 63, 0, 896, 63, 0, 896, 63, 0, 896, 63, 0, 896, 63, 0, 912, 63, 0, 912, 63, 0, 912, 63, 0, 912, 63, 0, 912, 63, 0, 912, 63, 0, 928, 63, 0, 928, 63, 0, 928, 63, 0, 928, 63, 0, 928, 63, 0, 928, 63, 0, 1988, 85, 0, 1988, 85, 0, 1988, 85, 0, 1988, 85, 0, 1992, 85, 0, 1992, 85, 0, 1992, 85, 0, 1992, 85, 0, 1996, 85, 0, 1996, 85, 0, 1996, 85, 0, 1996, 85, 0, 2004, 85, 0, 2004, 85, 0, 2004, 85, 0, 2004, 85, 0, 2008, 85, 0, 2008, 85, 0, 2008, 85, 0, 2008, 85, 0, 2012, 85, 0, 2012, 85, 0, 2012, 85, 0, 2012, 85, 0, 2020, 85, 0, 2020, 85, 0, 2020, 85, 0, 2020, 85, 0, 2024, 85, 0, 2024, 85, 0, 2024, 85, 0, 2024, 85, 0, 2028, 85, 0, 2028, 85, 0, 2028, 85, 0, 2028, 85, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3008, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3024, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0, 3040, 4278190080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756393380473743954_690_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756393380473743954_690_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8762c9f4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756393380473743954_690_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 4864, 17826816, 0, 4864, 17826816, 0, 4864, 17826816, 0, 4608, 67108864, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 11328, 73, 0, 11328, 73, 0, 11328, 73, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 5120, 2863311530, 0, 4864, 17826816, 0, 4864, 17826816, 0, 4864, 17826816, 0, 4608, 67108864, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 4224, 1073742165, 0, 11328, 73, 0, 11328, 73, 0, 11328, 73, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0, 14784, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756393381707547113_691_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756393381707547113_691_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c9dd2abd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756393381707547113_691_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,395 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 9)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 20))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 22)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31))) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 16)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((380 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2816, 2, 0, 14672, 570425344, 0, 14672, 570425344, 0, 14688, 570425344, 0, 14688, 570425344, 0, 14704, 570425344, 0, 14704, 570425344, 0, 17040, 536870912, 0, 17056, 536870912, 0, 17072, 536870912, 0, 18768, 1140850688, 0, 18768, 1140850688, 0, 18784, 1140850688, 0, 18784, 1140850688, 0, 18800, 1140850688, 0, 18800, 1140850688, 0, 24768, 34824, 0, 24768, 34824, 0, 24768, 34824, 0, 25216, 134217728, 0, 2816, 2, 0, 14672, 570425344, 0, 14672, 570425344, 0, 14688, 570425344, 0, 14688, 570425344, 0, 14704, 570425344, 0, 14704, 570425344, 0, 17040, 536870912, 0, 17056, 536870912, 0, 17072, 536870912, 0, 18768, 1140850688, 0, 18768, 1140850688, 0, 18784, 1140850688, 0, 18784, 1140850688, 0, 18800, 1140850688, 0, 18800, 1140850688, 0, 24768, 34824, 0, 24768, 34824, 0, 24768, 34824, 0, 25216, 134217728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756397195741608896_695_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756397195741608896_695_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e94b84e4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756397195741608896_695_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,289 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 25))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9168, 2097152, 0, 9172, 2097152, 0, 9176, 2097152, 0, 9184, 2097152, 0, 9188, 2097152, 0, 9192, 2097152, 0, 9744, 1638, 0, 9744, 1638, 0, 9744, 1638, 0, 9744, 1638, 0, 9744, 1638, 0, 9744, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 15872, 2147483648, 0, 9168, 2097152, 0, 9172, 2097152, 0, 9176, 2097152, 0, 9184, 2097152, 0, 9188, 2097152, 0, 9192, 2097152, 0, 9744, 1638, 0, 9744, 1638, 0, 9744, 1638, 0, 9744, 1638, 0, 9744, 1638, 0, 9744, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9748, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9752, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9760, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9764, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 9768, 1638, 0, 15872, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756399837494316551_700_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756399837494316551_700_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f197c7dc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756399837494316551_700_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,133 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 18))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 131072, 0, 5632, 262400, 0, 5632, 262400, 0, 8464, 256, 0, 8480, 256, 0, 8496, 256, 0, 9408, 1073741824, 0, 2560, 131072, 0, 5632, 262400, 0, 5632, 262400, 0, 8464, 256, 0, 8480, 256, 0, 8496, 256, 0, 9408, 1073741824, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756399838776878952_701_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756399838776878952_701_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..350159c8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756399838776878952_701_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 268701704, 0, 1664, 268701704, 0, 1664, 268701704, 0, 1664, 268701704, 0, 1680, 268701704, 0, 1680, 268701704, 0, 1680, 268701704, 0, 1680, 268701704, 0, 1696, 268701704, 0, 1696, 268701704, 0, 1696, 268701704, 0, 1696, 268701704, 0, 2624, 1, 0, 2628, 1, 0, 2632, 1, 0, 2640, 1, 0, 2644, 1, 0, 2648, 1, 0, 2656, 1, 0, 2660, 1, 0, 2664, 1, 0, 4096, 69632, 0, 4096, 69632, 0, 4112, 69632, 0, 4112, 69632, 0, 4128, 69632, 0, 4128, 69632, 0, 1664, 268701704, 0, 1664, 268701704, 0, 1664, 268701704, 0, 1664, 268701704, 0, 1680, 268701704, 0, 1680, 268701704, 0, 1680, 268701704, 0, 1680, 268701704, 0, 1696, 268701704, 0, 1696, 268701704, 0, 1696, 268701704, 0, 1696, 268701704, 0, 2624, 1, 0, 2628, 1, 0, 2632, 1, 0, 2640, 1, 0, 2644, 1, 0, 2648, 1, 0, 2656, 1, 0, 2660, 1, 0, 2664, 1, 0, 4096, 69632, 0, 4096, 69632, 0, 4112, 69632, 0, 4112, 69632, 0, 4128, 69632, 0, 4128, 69632, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756399892358605794_704_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756399892358605794_704_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..30145f80 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756399892358605794_704_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 526336, 0, 1792, 526336, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1280, 21, 0, 1280, 21, 0, 1280, 21, 0, 2432, 73, 0, 2432, 73, 0, 2432, 73, 0, 3008, 272696336, 0, 3008, 272696336, 0, 3008, 272696336, 0, 3008, 272696336, 0, 3008, 272696336, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 1792, 526336, 0, 1792, 526336, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1536, 2862785194, 0, 1280, 21, 0, 1280, 21, 0, 1280, 21, 0, 2432, 73, 0, 2432, 73, 0, 2432, 73, 0, 3008, 272696336, 0, 3008, 272696336, 0, 3008, 272696336, 0, 3008, 272696336, 0, 3008, 272696336, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0, 3328, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756399893181403155_705_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756399893181403155_705_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..be0c9311 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756399893181403155_705_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,299 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((220 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((256 << 6) | (i2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((263 << 6) | (i2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4288, 2818572290, 0, 4288, 2818572290, 0, 4288, 2818572290, 0, 4288, 2818572290, 0, 5504, 128, 0, 10640, 65536, 0, 13184, 2147483648, 0, 13200, 2147483648, 0, 13216, 2147483648, 0, 14784, 2147483648, 0, 14800, 2147483648, 0, 14816, 2147483648, 0, 17536, 33554464, 0, 17536, 33554464, 0, 17552, 33554464, 0, 17552, 33554464, 0, 17568, 33554464, 0, 17568, 33554464, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4288, 2818572290, 0, 4288, 2818572290, 0, 4288, 2818572290, 0, 4288, 2818572290, 0, 5504, 128, 0, 10640, 65536, 0, 13184, 2147483648, 0, 13200, 2147483648, 0, 13216, 2147483648, 0, 14784, 2147483648, 0, 14800, 2147483648, 0, 14816, 2147483648, 0, 17536, 33554464, 0, 17536, 33554464, 0, 17552, 33554464, 0, 17552, 33554464, 0, 17568, 33554464, 0, 17568, 33554464, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756399911146509944_706_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756399911146509944_706_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..64c3f9df --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756399911146509944_706_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,136 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 28))) { + if ((WaveGetLaneIndex() == 18)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0, 4416, 4026531871, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756399929860793033_707_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756399929860793033_707_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..afdf048c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756399929860793033_707_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,92 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 256, 0, 5184, 524288, 0, 2112, 256, 0, 5184, 524288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756400198593055625_709_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756400198593055625_709_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7d042c8b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756400198593055625_709_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,403 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 30))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((355 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((408 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (442 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((461 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (478 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (482 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4096, 0, 1104, 4096, 0, 2304, 512, 0, 2308, 512, 0, 2312, 512, 0, 2320, 512, 0, 2324, 512, 0, 2328, 512, 0, 9856, 33554432, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 12928, 1, 0, 13568, 1, 0, 15168, 1, 0, 1088, 4096, 0, 1104, 4096, 0, 2304, 512, 0, 2308, 512, 0, 2312, 512, 0, 2320, 512, 0, 2324, 512, 0, 2328, 512, 0, 9856, 33554432, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 10176, 613566756, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11264, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11280, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 11296, 1431655765, 0, 12928, 1, 0, 13568, 1, 0, 15168, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756400239410132311_710_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756400239410132311_710_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f6d39979 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756400239410132311_710_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,100 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2432, 525312, 0, 2432, 525312, 0, 2448, 525312, 0, 2448, 525312, 0, 2464, 525312, 0, 2464, 525312, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2432, 525312, 0, 2432, 525312, 0, 2448, 525312, 0, 2448, 525312, 0, 2464, 525312, 0, 2464, 525312, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0, 2944, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756400240164315275_711_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756400240164315275_711_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2c8d16a8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756400240164315275_711_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((24 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1206 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 2176, 85, 0, 2176, 85, 0, 2176, 85, 0, 2176, 85, 0, 2180, 85, 0, 2180, 85, 0, 2180, 85, 0, 2180, 85, 0, 2184, 85, 0, 2184, 85, 0, 2184, 85, 0, 2184, 85, 0, 2192, 85, 0, 2192, 85, 0, 2192, 85, 0, 2192, 85, 0, 2196, 85, 0, 2196, 85, 0, 2196, 85, 0, 2196, 85, 0, 2200, 85, 0, 2200, 85, 0, 2200, 85, 0, 2200, 85, 0, 2208, 85, 0, 2208, 85, 0, 2208, 85, 0, 2208, 85, 0, 2212, 85, 0, 2212, 85, 0, 2212, 85, 0, 2212, 85, 0, 2216, 85, 0, 2216, 85, 0, 2216, 85, 0, 2216, 85, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1536, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1540, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1544, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1552, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1556, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1560, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1568, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1572, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 1576, 2863311530, 0, 2176, 85, 0, 2176, 85, 0, 2176, 85, 0, 2176, 85, 0, 2180, 85, 0, 2180, 85, 0, 2180, 85, 0, 2180, 85, 0, 2184, 85, 0, 2184, 85, 0, 2184, 85, 0, 2184, 85, 0, 2192, 85, 0, 2192, 85, 0, 2192, 85, 0, 2192, 85, 0, 2196, 85, 0, 2196, 85, 0, 2196, 85, 0, 2196, 85, 0, 2200, 85, 0, 2200, 85, 0, 2200, 85, 0, 2200, 85, 0, 2208, 85, 0, 2208, 85, 0, 2208, 85, 0, 2208, 85, 0, 2212, 85, 0, 2212, 85, 0, 2212, 85, 0, 2212, 85, 0, 2216, 85, 0, 2216, 85, 0, 2216, 85, 0, 2216, 85, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3648, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3664, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0, 3680, 127, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756400386726658049_712_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756400386726658049_712_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b6e6b73c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756400386726658049_712_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,379 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31))) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((368 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((388 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((399 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((418 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (433 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 378 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 9, 0, 1280, 9, 0, 1296, 9, 0, 1296, 9, 0, 10384, 2148007952, 0, 10384, 2148007952, 0, 10384, 2148007952, 0, 10400, 2148007952, 0, 10400, 2148007952, 0, 10400, 2148007952, 0, 10416, 2148007952, 0, 10416, 2148007952, 0, 10416, 2148007952, 0, 11472, 4194304, 0, 11488, 4194304, 0, 11504, 4194304, 0, 13056, 536870912, 0, 14592, 612368384, 0, 14592, 612368384, 0, 14592, 612368384, 0, 15552, 536870912, 0, 18816, 256, 0, 18832, 256, 0, 23552, 4160, 0, 23552, 4160, 0, 23556, 4160, 0, 23556, 4160, 0, 23560, 4160, 0, 23560, 4160, 0, 23568, 4160, 0, 23568, 4160, 0, 23572, 4160, 0, 23572, 4160, 0, 23576, 4160, 0, 23576, 4160, 0, 23584, 4160, 0, 23584, 4160, 0, 23588, 4160, 0, 23588, 4160, 0, 23592, 4160, 0, 23592, 4160, 0, 24832, 2147483648, 0, 24836, 2147483648, 0, 24840, 2147483648, 0, 24848, 2147483648, 0, 24852, 2147483648, 0, 24856, 2147483648, 0, 24864, 2147483648, 0, 24868, 2147483648, 0, 24872, 2147483648, 0, 25536, 2147483648, 0, 25540, 2147483648, 0, 25544, 2147483648, 0, 25552, 2147483648, 0, 25556, 2147483648, 0, 25560, 2147483648, 0, 25568, 2147483648, 0, 25572, 2147483648, 0, 25576, 2147483648, 0, 26752, 2147483648, 0, 26768, 2147483648, 0, 26784, 2147483648, 0, 27712, 4096, 0, 1280, 9, 0, 1280, 9, 0, 1296, 9, 0, 1296, 9, 0, 10384, 2148007952, 0, 10384, 2148007952, 0, 10384, 2148007952, 0, 10400, 2148007952, 0, 10400, 2148007952, 0, 10400, 2148007952, 0, 10416, 2148007952, 0, 10416, 2148007952, 0, 10416, 2148007952, 0, 11472, 4194304, 0, 11488, 4194304, 0, 11504, 4194304, 0, 13056, 536870912, 0, 14592, 612368384, 0, 14592, 612368384, 0, 14592, 612368384, 0, 15552, 536870912, 0, 18816, 256, 0, 18832, 256, 0, 23552, 4160, 0, 23552, 4160, 0, 23556, 4160, 0, 23556, 4160, 0, 23560, 4160, 0, 23560, 4160, 0, 23568, 4160, 0, 23568, 4160, 0, 23572, 4160, 0, 23572, 4160, 0, 23576, 4160, 0, 23576, 4160, 0, 23584, 4160, 0, 23584, 4160, 0, 23588, 4160, 0, 23588, 4160, 0, 23592, 4160, 0, 23592, 4160, 0, 24832, 2147483648, 0, 24836, 2147483648, 0, 24840, 2147483648, 0, 24848, 2147483648, 0, 24852, 2147483648, 0, 24856, 2147483648, 0, 24864, 2147483648, 0, 24868, 2147483648, 0, 24872, 2147483648, 0, 25536, 2147483648, 0, 25540, 2147483648, 0, 25544, 2147483648, 0, 25552, 2147483648, 0, 25556, 2147483648, 0, 25560, 2147483648, 0, 25568, 2147483648, 0, 25572, 2147483648, 0, 25576, 2147483648, 0, 26752, 2147483648, 0, 26768, 2147483648, 0, 26784, 2147483648, 0, 27712, 4096, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756401424472046798_714_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756401424472046798_714_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aafa95db --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756401424472046798_714_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,77 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28))) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2816, 524288, 0, 2816, 524288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756401424688362770_715_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756401424688362770_715_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..875c5926 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756401424688362770_715_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 4))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 2097160, 0, 3392, 2097160, 0, 6080, 4, 0, 6096, 4, 0, 6112, 4, 0, 7616, 262148, 0, 7616, 262148, 0, 7632, 262148, 0, 7632, 262148, 0, 7648, 262148, 0, 7648, 262148, 0, 3392, 2097160, 0, 3392, 2097160, 0, 6080, 4, 0, 6096, 4, 0, 6112, 4, 0, 7616, 262148, 0, 7616, 262148, 0, 7632, 262148, 0, 7632, 262148, 0, 7648, 262148, 0, 7648, 262148, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756401491312678855_717_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756401491312678855_717_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e9869f6d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756401491312678855_717_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 4210696, 0, 1360, 4210696, 0, 1360, 4210696, 0, 1376, 4210696, 0, 1376, 4210696, 0, 1376, 4210696, 0, 1392, 4210696, 0, 1392, 4210696, 0, 1392, 4210696, 0, 2064, 2147483656, 0, 2064, 2147483656, 0, 2080, 2147483656, 0, 2080, 2147483656, 0, 2096, 2147483656, 0, 2096, 2147483656, 0, 2688, 85, 0, 2688, 85, 0, 2688, 85, 0, 2688, 85, 0, 1360, 4210696, 0, 1360, 4210696, 0, 1360, 4210696, 0, 1376, 4210696, 0, 1376, 4210696, 0, 1376, 4210696, 0, 1392, 4210696, 0, 1392, 4210696, 0, 1392, 4210696, 0, 2064, 2147483656, 0, 2064, 2147483656, 0, 2080, 2147483656, 0, 2080, 2147483656, 0, 2096, 2147483656, 0, 2096, 2147483656, 0, 2688, 85, 0, 2688, 85, 0, 2688, 85, 0, 2688, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756401611330117252_719_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756401611330117252_719_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e349d1fc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756401611330117252_719_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 21))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 16))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402092277704763_721_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402092277704763_721_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3903b3a9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402092277704763_721_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,375 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2256, 1073742409, 0, 2256, 1073742409, 0, 2256, 1073742409, 0, 2256, 1073742409, 0, 2256, 1073742409, 0, 2260, 1073742409, 0, 2260, 1073742409, 0, 2260, 1073742409, 0, 2260, 1073742409, 0, 2260, 1073742409, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 7168, 16644, 0, 7168, 16644, 0, 7168, 16644, 0, 7184, 16644, 0, 7184, 16644, 0, 7184, 16644, 0, 8000, 536870912, 0, 8016, 536870912, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 11520, 1073741825, 0, 11520, 1073741825, 0, 11968, 83886080, 0, 11968, 83886080, 0, 13184, 17895680, 0, 13184, 17895680, 0, 13184, 17895680, 0, 13184, 17895680, 0, 13184, 17895680, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 17552, 2, 0, 17568, 2, 0, 2256, 1073742409, 0, 2256, 1073742409, 0, 2256, 1073742409, 0, 2256, 1073742409, 0, 2256, 1073742409, 0, 2260, 1073742409, 0, 2260, 1073742409, 0, 2260, 1073742409, 0, 2260, 1073742409, 0, 2260, 1073742409, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 4096, 2181570690, 0, 7168, 16644, 0, 7168, 16644, 0, 7168, 16644, 0, 7184, 16644, 0, 7184, 16644, 0, 7184, 16644, 0, 8000, 536870912, 0, 8016, 536870912, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 9600, 1342177365, 0, 11520, 1073741825, 0, 11520, 1073741825, 0, 11968, 83886080, 0, 11968, 83886080, 0, 13184, 17895680, 0, 13184, 17895680, 0, 13184, 17895680, 0, 13184, 17895680, 0, 13184, 17895680, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 13504, 89478400, 0, 17552, 2, 0, 17568, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402136677989215_722_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402136677989215_722_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..473f7580 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402136677989215_722_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,230 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 17)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 262144, 0, 1360, 262144, 0, 1984, 341, 0, 1984, 341, 0, 1984, 341, 0, 1984, 341, 0, 1984, 341, 0, 2000, 341, 0, 2000, 341, 0, 2000, 341, 0, 2000, 341, 0, 2000, 341, 0, 3776, 4194368, 0, 3776, 4194368, 0, 3792, 4194368, 0, 3792, 4194368, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 6464, 8256, 0, 6464, 8256, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 10192, 131072, 0, 10208, 131072, 0, 10768, 131072, 0, 10784, 131072, 0, 11392, 131072, 0, 1344, 262144, 0, 1360, 262144, 0, 1984, 341, 0, 1984, 341, 0, 1984, 341, 0, 1984, 341, 0, 1984, 341, 0, 2000, 341, 0, 2000, 341, 0, 2000, 341, 0, 2000, 341, 0, 2000, 341, 0, 3776, 4194368, 0, 3776, 4194368, 0, 3792, 4194368, 0, 3792, 4194368, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 7104, 4286578751, 0, 6464, 8256, 0, 6464, 8256, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 8960, 272696336, 0, 10192, 131072, 0, 10208, 131072, 0, 10768, 131072, 0, 10784, 131072, 0, 11392, 131072, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402147519295486_723_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402147519295486_723_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4f4df45b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402147519295486_723_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,77 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 268435456, 0, 3456, 268435456, 0, 2240, 268435456, 0, 3456, 268435456, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402496011499653_729_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402496011499653_729_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3f5a2f71 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402496011499653_729_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 16)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2064, 1052672, 0, 2064, 1052672, 0, 4624, 273, 0, 4624, 273, 0, 4624, 273, 0, 5584, 286261248, 0, 5584, 286261248, 0, 5584, 286261248, 0, 5588, 286261248, 0, 5588, 286261248, 0, 5588, 286261248, 0, 6032, 4369, 0, 6032, 4369, 0, 6032, 4369, 0, 6032, 4369, 0, 7248, 16, 0, 9536, 16384, 0, 9984, 559240, 0, 9984, 559240, 0, 9984, 559240, 0, 9984, 559240, 0, 9984, 559240, 0, 2064, 1052672, 0, 2064, 1052672, 0, 4624, 273, 0, 4624, 273, 0, 4624, 273, 0, 5584, 286261248, 0, 5584, 286261248, 0, 5584, 286261248, 0, 5588, 286261248, 0, 5588, 286261248, 0, 5588, 286261248, 0, 6032, 4369, 0, 6032, 4369, 0, 6032, 4369, 0, 6032, 4369, 0, 7248, 16, 0, 9536, 16384, 0, 9984, 559240, 0, 9984, 559240, 0, 9984, 559240, 0, 9984, 559240, 0, 9984, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402498055574596_730_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402498055574596_730_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..72dc2490 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402498055574596_730_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402499320726234_731_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402499320726234_731_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cd69fc5d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402499320726234_731_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,268 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 13))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 268435456, 0, 2128, 268435456, 0, 2144, 268435456, 0, 4992, 16, 0, 5008, 16, 0, 5024, 16, 0, 9472, 1074003968, 0, 9472, 1074003968, 0, 12544, 1073741824, 0, 13120, 4195328, 0, 13120, 4195328, 0, 13440, 67125252, 0, 13440, 67125252, 0, 13440, 67125252, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 2112, 268435456, 0, 2128, 268435456, 0, 2144, 268435456, 0, 4992, 16, 0, 5008, 16, 0, 5024, 16, 0, 9472, 1074003968, 0, 9472, 1074003968, 0, 12544, 1073741824, 0, 13120, 4195328, 0, 13120, 4195328, 0, 13440, 67125252, 0, 13440, 67125252, 0, 13440, 67125252, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0, 13888, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402636880361445_734_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402636880361445_734_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2d066d0c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402636880361445_734_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 19))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 3))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 1))) { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 1073741825, 0, 3392, 1073741825, 0, 4608, 262400, 0, 4608, 262400, 0, 10880, 545392672, 0, 10880, 545392672, 0, 10880, 545392672, 0, 10880, 545392672, 0, 10880, 545392672, 0, 3392, 1073741825, 0, 3392, 1073741825, 0, 4608, 262400, 0, 4608, 262400, 0, 10880, 545392672, 0, 10880, 545392672, 0, 10880, 545392672, 0, 10880, 545392672, 0, 10880, 545392672, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402638471191011_735_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402638471191011_735_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c657148 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402638471191011_735_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,237 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 3456, 1048832, 0, 3456, 1048832, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4800, 559240, 0, 4800, 559240, 0, 4800, 559240, 0, 4800, 559240, 0, 4800, 559240, 0, 6592, 268435457, 0, 6592, 268435457, 0, 9984, 2, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 11392, 559240, 0, 11392, 559240, 0, 11392, 559240, 0, 11392, 559240, 0, 11392, 559240, 0, 768, 1, 0, 3456, 1048832, 0, 3456, 1048832, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4352, 1145324612, 0, 4800, 559240, 0, 4800, 559240, 0, 4800, 559240, 0, 4800, 559240, 0, 4800, 559240, 0, 6592, 268435457, 0, 6592, 268435457, 0, 9984, 2, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 10944, 1145324612, 0, 11392, 559240, 0, 11392, 559240, 0, 11392, 559240, 0, 11392, 559240, 0, 11392, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402641923890008_736_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402641923890008_736_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ad407d52 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402641923890008_736_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,254 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4544, 17, 0, 4544, 17, 0, 9536, 33554432, 0, 12608, 131072, 0, 12624, 131072, 0, 15744, 4194304, 0, 15760, 4194304, 0, 16896, 2147483656, 0, 16896, 2147483656, 0, 17600, 2147483656, 0, 17600, 2147483656, 0, 4544, 17, 0, 4544, 17, 0, 9536, 33554432, 0, 12608, 131072, 0, 12624, 131072, 0, 15744, 4194304, 0, 15760, 4194304, 0, 16896, 2147483656, 0, 16896, 2147483656, 0, 17600, 2147483656, 0, 17600, 2147483656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402646393957892_737_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402646393957892_737_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17c951ae --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402646393957892_737_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,541 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 29)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((i5 == 1)) { + continue; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 18))) { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((392 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((409 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((428 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((447 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((456 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (460 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (470 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (479 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + uint counter9 = 0; + while ((counter9 < 3)) { + counter9 = (counter9 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((513 << 6) | (i8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i8 == 1)) { + continue; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (523 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (527 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2768, 536870912, 0, 2772, 536870912, 0, 2776, 536870912, 0, 2784, 536870912, 0, 2788, 536870912, 0, 2792, 536870912, 0, 4288, 1092, 0, 4288, 1092, 0, 4288, 1092, 0, 9920, 71319552, 0, 9920, 71319552, 0, 9920, 71319552, 0, 11136, 1077936128, 0, 11136, 1077936128, 0, 16064, 17, 0, 16064, 17, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 18240, 2281701376, 0, 18240, 2281701376, 0, 21952, 2290089984, 0, 21952, 2290089984, 0, 21952, 2290089984, 0, 29200, 559240, 0, 29200, 559240, 0, 29200, 559240, 0, 29200, 559240, 0, 29200, 559240, 0, 29216, 559240, 0, 29216, 559240, 0, 29216, 559240, 0, 29216, 559240, 0, 29216, 559240, 0, 30080, 17, 0, 30080, 17, 0, 33472, 559240, 0, 33472, 559240, 0, 33472, 559240, 0, 33472, 559240, 0, 33472, 559240, 0, 576, 17, 0, 576, 17, 0, 2768, 536870912, 0, 2772, 536870912, 0, 2776, 536870912, 0, 2784, 536870912, 0, 2788, 536870912, 0, 2792, 536870912, 0, 4288, 1092, 0, 4288, 1092, 0, 4288, 1092, 0, 9920, 71319552, 0, 9920, 71319552, 0, 9920, 71319552, 0, 11136, 1077936128, 0, 11136, 1077936128, 0, 16064, 17, 0, 16064, 17, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 16960, 1145324612, 0, 18240, 2281701376, 0, 18240, 2281701376, 0, 21952, 2290089984, 0, 21952, 2290089984, 0, 21952, 2290089984, 0, 29200, 559240, 0, 29200, 559240, 0, 29200, 559240, 0, 29200, 559240, 0, 29200, 559240, 0, 29216, 559240, 0, 29216, 559240, 0, 29216, 559240, 0, 29216, 559240, 0, 29216, 559240, 0, 30080, 17, 0, 30080, 17, 0, 33472, 559240, 0, 33472, 559240, 0, 33472, 559240, 0, 33472, 559240, 0, 33472, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402719216832501_738_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402719216832501_738_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ffa7f356 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402719216832501_738_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,436 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 5)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((362 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 286261248, 0, 1088, 286261248, 0, 1088, 286261248, 0, 1104, 286261248, 0, 1104, 286261248, 0, 1104, 286261248, 0, 1120, 286261248, 0, 1120, 286261248, 0, 1120, 286261248, 0, 4800, 1140850692, 0, 4800, 1140850692, 0, 4800, 1140850692, 0, 4816, 1140850692, 0, 4816, 1140850692, 0, 4816, 1140850692, 0, 4832, 1140850692, 0, 4832, 1140850692, 0, 4832, 1140850692, 0, 5828, 1073741824, 0, 5832, 1073741824, 0, 5844, 1073741824, 0, 5848, 1073741824, 0, 5860, 1073741824, 0, 5864, 1073741824, 0, 8448, 8, 0, 9408, 2147483648, 0, 11392, 559232, 0, 11392, 559232, 0, 11392, 559232, 0, 11392, 559232, 0, 16064, 134217728, 0, 16512, 33554432, 0, 18048, 1, 0, 18624, 1, 0, 1088, 286261248, 0, 1088, 286261248, 0, 1088, 286261248, 0, 1104, 286261248, 0, 1104, 286261248, 0, 1104, 286261248, 0, 1120, 286261248, 0, 1120, 286261248, 0, 1120, 286261248, 0, 4800, 1140850692, 0, 4800, 1140850692, 0, 4800, 1140850692, 0, 4816, 1140850692, 0, 4816, 1140850692, 0, 4816, 1140850692, 0, 4832, 1140850692, 0, 4832, 1140850692, 0, 4832, 1140850692, 0, 5828, 1073741824, 0, 5832, 1073741824, 0, 5844, 1073741824, 0, 5848, 1073741824, 0, 5860, 1073741824, 0, 5864, 1073741824, 0, 8448, 8, 0, 9408, 2147483648, 0, 11392, 559232, 0, 11392, 559232, 0, 11392, 559232, 0, 11392, 559232, 0, 16064, 134217728, 0, 16512, 33554432, 0, 18048, 1, 0, 18624, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402862344815567_739_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402862344815567_739_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5a14b0a0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402862344815567_739_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 31)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402862516441794_740_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402862516441794_740_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..36a9d7fe --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402862516441794_740_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,366 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 22)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 31)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 28)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 26)) { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((285 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((325 << 6) | (counter3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((332 << 6) | (counter3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((339 << 6) | (counter3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i5 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1140 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 6272, 2281701376, 0, 6272, 2281701376, 0, 6288, 2281701376, 0, 6288, 2281701376, 0, 7744, 2290089992, 0, 7744, 2290089992, 0, 7744, 2290089992, 0, 7744, 2290089992, 0, 7760, 2290089992, 0, 7760, 2290089992, 0, 7760, 2290089992, 0, 7760, 2290089992, 0, 13376, 41943056, 0, 13376, 41943056, 0, 13376, 41943056, 0, 13392, 41943056, 0, 13392, 41943056, 0, 13392, 41943056, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 20816, 61, 0, 20816, 61, 0, 20816, 61, 0, 20816, 61, 0, 20816, 61, 0, 20820, 61, 0, 20820, 61, 0, 20820, 61, 0, 20820, 61, 0, 20820, 61, 0, 20832, 61, 0, 20832, 61, 0, 20832, 61, 0, 20832, 61, 0, 20832, 61, 0, 20836, 61, 0, 20836, 61, 0, 20836, 61, 0, 20836, 61, 0, 20836, 61, 0, 20848, 61, 0, 20848, 61, 0, 20848, 61, 0, 20848, 61, 0, 20848, 61, 0, 20852, 61, 0, 20852, 61, 0, 20852, 61, 0, 20852, 61, 0, 20852, 61, 0, 21264, 61, 0, 21264, 61, 0, 21264, 61, 0, 21264, 61, 0, 21264, 61, 0, 21268, 61, 0, 21268, 61, 0, 21268, 61, 0, 21268, 61, 0, 21268, 61, 0, 21280, 61, 0, 21280, 61, 0, 21280, 61, 0, 21280, 61, 0, 21280, 61, 0, 21284, 61, 0, 21284, 61, 0, 21284, 61, 0, 21284, 61, 0, 21284, 61, 0, 21296, 61, 0, 21296, 61, 0, 21296, 61, 0, 21296, 61, 0, 21296, 61, 0, 21300, 61, 0, 21300, 61, 0, 21300, 61, 0, 21300, 61, 0, 21300, 61, 0, 21712, 128, 0, 21716, 128, 0, 21728, 128, 0, 21732, 128, 0, 21744, 128, 0, 21748, 128, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 576, 17, 0, 576, 17, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 4928, 1145324612, 0, 6272, 2281701376, 0, 6272, 2281701376, 0, 6288, 2281701376, 0, 6288, 2281701376, 0, 7744, 2290089992, 0, 7744, 2290089992, 0, 7744, 2290089992, 0, 7744, 2290089992, 0, 7760, 2290089992, 0, 7760, 2290089992, 0, 7760, 2290089992, 0, 7760, 2290089992, 0, 13376, 41943056, 0, 13376, 41943056, 0, 13376, 41943056, 0, 13392, 41943056, 0, 13392, 41943056, 0, 13392, 41943056, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14416, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14432, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 14448, 2863311530, 0, 20816, 61, 0, 20816, 61, 0, 20816, 61, 0, 20816, 61, 0, 20816, 61, 0, 20820, 61, 0, 20820, 61, 0, 20820, 61, 0, 20820, 61, 0, 20820, 61, 0, 20832, 61, 0, 20832, 61, 0, 20832, 61, 0, 20832, 61, 0, 20832, 61, 0, 20836, 61, 0, 20836, 61, 0, 20836, 61, 0, 20836, 61, 0, 20836, 61, 0, 20848, 61, 0, 20848, 61, 0, 20848, 61, 0, 20848, 61, 0, 20848, 61, 0, 20852, 61, 0, 20852, 61, 0, 20852, 61, 0, 20852, 61, 0, 20852, 61, 0, 21264, 61, 0, 21264, 61, 0, 21264, 61, 0, 21264, 61, 0, 21264, 61, 0, 21268, 61, 0, 21268, 61, 0, 21268, 61, 0, 21268, 61, 0, 21268, 61, 0, 21280, 61, 0, 21280, 61, 0, 21280, 61, 0, 21280, 61, 0, 21280, 61, 0, 21284, 61, 0, 21284, 61, 0, 21284, 61, 0, 21284, 61, 0, 21284, 61, 0, 21296, 61, 0, 21296, 61, 0, 21296, 61, 0, 21296, 61, 0, 21296, 61, 0, 21300, 61, 0, 21300, 61, 0, 21300, 61, 0, 21300, 61, 0, 21300, 61, 0, 21712, 128, 0, 21716, 128, 0, 21728, 128, 0, 21732, 128, 0, 21744, 128, 0, 21748, 128, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22480, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22496, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0, 22512, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402965720180180_741_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402965720180180_741_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29e42d45 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402965720180180_741_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,215 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((counter1 == 1)) { + break; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4032, 73, 0, 4032, 73, 0, 4032, 73, 0, 4944, 524288, 0, 4960, 524288, 0, 6032, 2, 0, 6048, 2, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4032, 73, 0, 4032, 73, 0, 4032, 73, 0, 4944, 524288, 0, 4960, 524288, 0, 6032, 2, 0, 6048, 2, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0, 7424, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402989565748477_742_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402989565748477_742_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..468d0985 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402989565748477_742_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,183 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3648, 272696336, 0, 3648, 272696336, 0, 3648, 272696336, 0, 3648, 272696336, 0, 3648, 272696336, 0, 8208, 4210708, 0, 8208, 4210708, 0, 8208, 4210708, 0, 8208, 4210708, 0, 8212, 4210708, 0, 8212, 4210708, 0, 8212, 4210708, 0, 8212, 4210708, 0, 9168, 1048576, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 3648, 272696336, 0, 3648, 272696336, 0, 3648, 272696336, 0, 3648, 272696336, 0, 3648, 272696336, 0, 8208, 4210708, 0, 8208, 4210708, 0, 8208, 4210708, 0, 8208, 4210708, 0, 8212, 4210708, 0, 8212, 4210708, 0, 8212, 4210708, 0, 8212, 4210708, 0, 9168, 1048576, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756402999542770151_744_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756402999542770151_744_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dccaf5fa --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756402999542770151_744_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 2))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 576, 17, 0, 576, 17, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4416, 2004318071, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0, 4864, 1048575, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403000179732155_745_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403000179732155_745_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..413e7f7e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403000179732155_745_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } else { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 14))) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 536870914, 0, 1856, 536870914, 0, 6592, 536870912, 0, 9216, 16777216, 0, 9232, 16777216, 0, 9248, 16777216, 0, 11136, 1048576, 0, 11152, 1048576, 0, 11168, 1048576, 0, 12864, 2147483648, 0, 1856, 536870914, 0, 1856, 536870914, 0, 6592, 536870912, 0, 9216, 16777216, 0, 9232, 16777216, 0, 9248, 16777216, 0, 11136, 1048576, 0, 11152, 1048576, 0, 11168, 1048576, 0, 12864, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403165495067553_748_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403165495067553_748_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a634e01 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403165495067553_748_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 64, 0, 912, 64, 0, 928, 64, 0, 1344, 4096, 0, 1360, 4096, 0, 1376, 4096, 0, 3024, 135266820, 0, 3024, 135266820, 0, 3024, 135266820, 0, 3024, 135266820, 0, 3040, 135266820, 0, 3040, 135266820, 0, 3040, 135266820, 0, 3040, 135266820, 0, 3056, 135266820, 0, 3056, 135266820, 0, 3056, 135266820, 0, 3056, 135266820, 0, 4944, 65536, 0, 4960, 65536, 0, 4976, 65536, 0, 10512, 536870916, 0, 10512, 536870916, 0, 10528, 536870916, 0, 10528, 536870916, 0, 10544, 536870916, 0, 10544, 536870916, 0, 896, 64, 0, 912, 64, 0, 928, 64, 0, 1344, 4096, 0, 1360, 4096, 0, 1376, 4096, 0, 3024, 135266820, 0, 3024, 135266820, 0, 3024, 135266820, 0, 3024, 135266820, 0, 3040, 135266820, 0, 3040, 135266820, 0, 3040, 135266820, 0, 3040, 135266820, 0, 3056, 135266820, 0, 3056, 135266820, 0, 3056, 135266820, 0, 3056, 135266820, 0, 4944, 65536, 0, 4960, 65536, 0, 4976, 65536, 0, 10512, 536870916, 0, 10512, 536870916, 0, 10528, 536870916, 0, 10528, 536870916, 0, 10544, 536870916, 0, 10544, 536870916, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403239200281039_750_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403239200281039_750_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..027384b7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403239200281039_750_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,283 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 17, 0, 2048, 17, 0, 3456, 512, 0, 3472, 512, 0, 10624, 67125252, 0, 10624, 67125252, 0, 10624, 67125252, 0, 11264, 8, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 17, 0, 2048, 17, 0, 3456, 512, 0, 3472, 512, 0, 10624, 67125252, 0, 10624, 67125252, 0, 10624, 67125252, 0, 11264, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403243399135529_751_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403243399135529_751_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e14ae8a7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403243399135529_751_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 24)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 18))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1426063360, 0, 768, 1426063360, 0, 768, 1426063360, 0, 768, 1426063360, 0, 5184, 1409286144, 0, 5184, 1409286144, 0, 5184, 1409286144, 0, 7636, 16640, 0, 7636, 16640, 0, 7640, 16640, 0, 7640, 16640, 0, 7644, 16640, 0, 7644, 16640, 0, 768, 1426063360, 0, 768, 1426063360, 0, 768, 1426063360, 0, 768, 1426063360, 0, 5184, 1409286144, 0, 5184, 1409286144, 0, 5184, 1409286144, 0, 7636, 16640, 0, 7636, 16640, 0, 7640, 16640, 0, 7640, 16640, 0, 7644, 16640, 0, 7644, 16640, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403273287809138_753_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403273287809138_753_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7fce0983 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403273287809138_753_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,351 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 30))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 16)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 16384, 8, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 6592, 1431655765, 0, 16384, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403389690409077_756_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403389690409077_756_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0f29a404 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403389690409077_756_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,223 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 31, 0, 576, 31, 0, 576, 31, 0, 576, 31, 0, 576, 31, 0, 1488, 63, 0, 1488, 63, 0, 1488, 63, 0, 1488, 63, 0, 1488, 63, 0, 1488, 63, 0, 1504, 63, 0, 1504, 63, 0, 1504, 63, 0, 1504, 63, 0, 1504, 63, 0, 1504, 63, 0, 3412, 576, 0, 3412, 576, 0, 3416, 576, 0, 3416, 576, 0, 3428, 576, 0, 3428, 576, 0, 3432, 576, 0, 3432, 576, 0, 4628, 2049, 0, 4628, 2049, 0, 4632, 2049, 0, 4632, 2049, 0, 4644, 2049, 0, 4644, 2049, 0, 4648, 2049, 0, 4648, 2049, 0, 6420, 285212689, 0, 6420, 285212689, 0, 6420, 285212689, 0, 6420, 285212689, 0, 6424, 285212689, 0, 6424, 285212689, 0, 6424, 285212689, 0, 6424, 285212689, 0, 6436, 285212689, 0, 6436, 285212689, 0, 6436, 285212689, 0, 6436, 285212689, 0, 6440, 285212689, 0, 6440, 285212689, 0, 6440, 285212689, 0, 6440, 285212689, 0, 6452, 285212689, 0, 6452, 285212689, 0, 6452, 285212689, 0, 6452, 285212689, 0, 6456, 285212689, 0, 6456, 285212689, 0, 6456, 285212689, 0, 6456, 285212689, 0, 8576, 2, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11520, 559240, 0, 11520, 559240, 0, 11520, 559240, 0, 11520, 559240, 0, 11520, 559240, 0, 576, 31, 0, 576, 31, 0, 576, 31, 0, 576, 31, 0, 576, 31, 0, 1488, 63, 0, 1488, 63, 0, 1488, 63, 0, 1488, 63, 0, 1488, 63, 0, 1488, 63, 0, 1504, 63, 0, 1504, 63, 0, 1504, 63, 0, 1504, 63, 0, 1504, 63, 0, 1504, 63, 0, 3412, 576, 0, 3412, 576, 0, 3416, 576, 0, 3416, 576, 0, 3428, 576, 0, 3428, 576, 0, 3432, 576, 0, 3432, 576, 0, 4628, 2049, 0, 4628, 2049, 0, 4632, 2049, 0, 4632, 2049, 0, 4644, 2049, 0, 4644, 2049, 0, 4648, 2049, 0, 4648, 2049, 0, 6420, 285212689, 0, 6420, 285212689, 0, 6420, 285212689, 0, 6420, 285212689, 0, 6424, 285212689, 0, 6424, 285212689, 0, 6424, 285212689, 0, 6424, 285212689, 0, 6436, 285212689, 0, 6436, 285212689, 0, 6436, 285212689, 0, 6436, 285212689, 0, 6440, 285212689, 0, 6440, 285212689, 0, 6440, 285212689, 0, 6440, 285212689, 0, 6452, 285212689, 0, 6452, 285212689, 0, 6452, 285212689, 0, 6452, 285212689, 0, 6456, 285212689, 0, 6456, 285212689, 0, 6456, 285212689, 0, 6456, 285212689, 0, 8576, 2, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11072, 1145324612, 0, 11520, 559240, 0, 11520, 559240, 0, 11520, 559240, 0, 11520, 559240, 0, 11520, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403445855280998_757_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403445855280998_757_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..94f7ff14 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403445855280998_757_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,340 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((189 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((329 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((346 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((365 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((374 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((383 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6212, 64, 0, 6216, 64, 0, 6220, 64, 0, 6228, 64, 0, 6232, 64, 0, 6236, 64, 0, 6244, 64, 0, 6248, 64, 0, 6252, 64, 0, 6656, 1090781184, 0, 6656, 1090781184, 0, 6656, 1090781184, 0, 6672, 1090781184, 0, 6672, 1090781184, 0, 6672, 1090781184, 0, 6688, 1090781184, 0, 6688, 1090781184, 0, 6688, 1090781184, 0, 7872, 3657433088, 0, 7872, 3657433088, 0, 7872, 3657433088, 0, 7872, 3657433088, 0, 7872, 3657433088, 0, 13376, 16, 0, 14016, 73, 0, 14016, 73, 0, 14016, 73, 0, 14592, 4260880, 0, 14592, 4260880, 0, 14592, 4260880, 0, 14592, 4260880, 0, 18944, 4096, 0, 21060, 603979776, 0, 21060, 603979776, 0, 21064, 603979776, 0, 21064, 603979776, 0, 21076, 603979776, 0, 21076, 603979776, 0, 21080, 603979776, 0, 21080, 603979776, 0, 23940, 545392672, 0, 23940, 545392672, 0, 23940, 545392672, 0, 23940, 545392672, 0, 23940, 545392672, 0, 23944, 545392672, 0, 23944, 545392672, 0, 23944, 545392672, 0, 23944, 545392672, 0, 23944, 545392672, 0, 23956, 545392672, 0, 23956, 545392672, 0, 23956, 545392672, 0, 23956, 545392672, 0, 23956, 545392672, 0, 23960, 545392672, 0, 23960, 545392672, 0, 23960, 545392672, 0, 23960, 545392672, 0, 23960, 545392672, 0, 24512, 68174084, 0, 24512, 68174084, 0, 24512, 68174084, 0, 24512, 68174084, 0, 24512, 68174084, 0, 24528, 68174084, 0, 24528, 68174084, 0, 24528, 68174084, 0, 24528, 68174084, 0, 24528, 68174084, 0, 6212, 64, 0, 6216, 64, 0, 6220, 64, 0, 6228, 64, 0, 6232, 64, 0, 6236, 64, 0, 6244, 64, 0, 6248, 64, 0, 6252, 64, 0, 6656, 1090781184, 0, 6656, 1090781184, 0, 6656, 1090781184, 0, 6672, 1090781184, 0, 6672, 1090781184, 0, 6672, 1090781184, 0, 6688, 1090781184, 0, 6688, 1090781184, 0, 6688, 1090781184, 0, 7872, 3657433088, 0, 7872, 3657433088, 0, 7872, 3657433088, 0, 7872, 3657433088, 0, 7872, 3657433088, 0, 13376, 16, 0, 14016, 73, 0, 14016, 73, 0, 14016, 73, 0, 14592, 4260880, 0, 14592, 4260880, 0, 14592, 4260880, 0, 14592, 4260880, 0, 18944, 4096, 0, 21060, 603979776, 0, 21060, 603979776, 0, 21064, 603979776, 0, 21064, 603979776, 0, 21076, 603979776, 0, 21076, 603979776, 0, 21080, 603979776, 0, 21080, 603979776, 0, 23940, 545392672, 0, 23940, 545392672, 0, 23940, 545392672, 0, 23940, 545392672, 0, 23940, 545392672, 0, 23944, 545392672, 0, 23944, 545392672, 0, 23944, 545392672, 0, 23944, 545392672, 0, 23944, 545392672, 0, 23956, 545392672, 0, 23956, 545392672, 0, 23956, 545392672, 0, 23956, 545392672, 0, 23956, 545392672, 0, 23960, 545392672, 0, 23960, 545392672, 0, 23960, 545392672, 0, 23960, 545392672, 0, 23960, 545392672, 0, 24512, 68174084, 0, 24512, 68174084, 0, 24512, 68174084, 0, 24512, 68174084, 0, 24512, 68174084, 0, 24528, 68174084, 0, 24528, 68174084, 0, 24528, 68174084, 0, 24528, 68174084, 0, 24528, 68174084, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403851202525172_760_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403851202525172_760_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..14b50226 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403851202525172_760_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,285 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 12)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 73, 0, 3392, 73, 0, 3392, 73, 0, 5120, 136348168, 0, 5120, 136348168, 0, 5120, 136348168, 0, 5120, 136348168, 0, 5120, 136348168, 0, 5760, 2, 0, 6400, 16, 0, 7424, 2, 0, 8000, 2181570560, 0, 8000, 2181570560, 0, 8000, 2181570560, 0, 8000, 2181570560, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 3392, 73, 0, 3392, 73, 0, 3392, 73, 0, 5120, 136348168, 0, 5120, 136348168, 0, 5120, 136348168, 0, 5120, 136348168, 0, 5120, 136348168, 0, 5760, 2, 0, 6400, 16, 0, 7424, 2, 0, 8000, 2181570560, 0, 8000, 2181570560, 0, 8000, 2181570560, 0, 8000, 2181570560, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0, 9536, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403853666930137_761_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403853666930137_761_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8ba3549a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403853666930137_761_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,320 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 18))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 13)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((207 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((218 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1680, 4194304, 0, 1696, 4194304, 0, 3520, 68174084, 0, 3520, 68174084, 0, 3520, 68174084, 0, 3520, 68174084, 0, 3520, 68174084, 0, 5504, 559240, 0, 5504, 559240, 0, 5504, 559240, 0, 5504, 559240, 0, 5504, 559240, 0, 12112, 1073741824, 0, 13968, 1140850688, 0, 13968, 1140850688, 0, 13972, 1140850688, 0, 13972, 1140850688, 0, 15680, 559240, 0, 15680, 559240, 0, 15680, 559240, 0, 15680, 559240, 0, 15680, 559240, 0, 768, 65, 0, 768, 65, 0, 1680, 4194304, 0, 1696, 4194304, 0, 3520, 68174084, 0, 3520, 68174084, 0, 3520, 68174084, 0, 3520, 68174084, 0, 3520, 68174084, 0, 5504, 559240, 0, 5504, 559240, 0, 5504, 559240, 0, 5504, 559240, 0, 5504, 559240, 0, 12112, 1073741824, 0, 13968, 1140850688, 0, 13968, 1140850688, 0, 13972, 1140850688, 0, 13972, 1140850688, 0, 15680, 559240, 0, 15680, 559240, 0, 15680, 559240, 0, 15680, 559240, 0, 15680, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756403898571076056_762_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756403898571076056_762_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5e1c15c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756403898571076056_762_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,431 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((180 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((277 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 2)) { + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (371 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (423 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 378 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2000, 4096, 0, 2016, 4096, 0, 2032, 4096, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 12868, 17408, 0, 12868, 17408, 0, 12872, 17408, 0, 12872, 17408, 0, 12884, 17408, 0, 12884, 17408, 0, 12888, 17408, 0, 12888, 17408, 0, 13760, 559240, 0, 13760, 559240, 0, 13760, 559240, 0, 13760, 559240, 0, 13760, 559240, 0, 16512, 2, 0, 16528, 2, 0, 17728, 2, 0, 17732, 2, 0, 17736, 2, 0, 17744, 2, 0, 17748, 2, 0, 17752, 2, 0, 18304, 8192, 0, 23488, 557192, 0, 23488, 557192, 0, 23488, 557192, 0, 23488, 557192, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 26816, 268435456, 0, 26432, 2105856, 0, 26432, 2105856, 0, 26432, 2105856, 0, 26176, 264241152, 0, 26176, 264241152, 0, 26176, 264241152, 0, 26176, 264241152, 0, 26176, 264241152, 0, 26176, 264241152, 0, 2000, 4096, 0, 2016, 4096, 0, 2032, 4096, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9728, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 9744, 1145324612, 0, 12868, 17408, 0, 12868, 17408, 0, 12872, 17408, 0, 12872, 17408, 0, 12884, 17408, 0, 12884, 17408, 0, 12888, 17408, 0, 12888, 17408, 0, 13760, 559240, 0, 13760, 559240, 0, 13760, 559240, 0, 13760, 559240, 0, 13760, 559240, 0, 16512, 2, 0, 16528, 2, 0, 17728, 2, 0, 17732, 2, 0, 17736, 2, 0, 17744, 2, 0, 17748, 2, 0, 17752, 2, 0, 18304, 8192, 0, 23488, 557192, 0, 23488, 557192, 0, 23488, 557192, 0, 23488, 557192, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 27072, 3758096415, 0, 26816, 268435456, 0, 26432, 2105856, 0, 26432, 2105856, 0, 26432, 2105856, 0, 26176, 264241152, 0, 26176, 264241152, 0, 26176, 264241152, 0, 26176, 264241152, 0, 26176, 264241152, 0, 26176, 264241152, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404066584512650_763_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404066584512650_763_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0670da88 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404066584512650_763_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 720 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 4368, 512, 0, 4372, 512, 0, 4376, 512, 0, 4384, 512, 0, 4388, 512, 0, 4392, 512, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3280, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3284, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3288, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3296, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3300, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 3304, 1431655765, 0, 4368, 512, 0, 4372, 512, 0, 4376, 512, 0, 4384, 512, 0, 4388, 512, 0, 4392, 512, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404162095190674_765_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404162095190674_765_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1257acde --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404162095190674_765_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,358 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 25))) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 18))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2256, 1048576, 0, 2272, 1048576, 0, 3920, 131072, 0, 3936, 131072, 0, 3952, 131072, 0, 5136, 2, 0, 5152, 2, 0, 5168, 2, 0, 6608, 32, 0, 6624, 32, 0, 6640, 32, 0, 10112, 559240, 0, 10112, 559240, 0, 10112, 559240, 0, 10112, 559240, 0, 10112, 559240, 0, 10752, 85, 0, 10752, 85, 0, 10752, 85, 0, 10752, 85, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12864, 136314880, 0, 12864, 136314880, 0, 14400, 520, 0, 14400, 520, 0, 18128, 268443648, 0, 18128, 268443648, 0, 18144, 268443648, 0, 18144, 268443648, 0, 21312, 536870912, 0, 22480, 8388608, 0, 22496, 8388608, 0, 2256, 1048576, 0, 2272, 1048576, 0, 3920, 131072, 0, 3936, 131072, 0, 3952, 131072, 0, 5136, 2, 0, 5152, 2, 0, 5168, 2, 0, 6608, 32, 0, 6624, 32, 0, 6640, 32, 0, 10112, 559240, 0, 10112, 559240, 0, 10112, 559240, 0, 10112, 559240, 0, 10112, 559240, 0, 10752, 85, 0, 10752, 85, 0, 10752, 85, 0, 10752, 85, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12416, 1090785345, 0, 12864, 136314880, 0, 12864, 136314880, 0, 14400, 520, 0, 14400, 520, 0, 18128, 268443648, 0, 18128, 268443648, 0, 18144, 268443648, 0, 18144, 268443648, 0, 21312, 536870912, 0, 22480, 8388608, 0, 22496, 8388608, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404233796292451_766_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404233796292451_766_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..52017690 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404233796292451_766_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,98 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 2147483648, 0, 912, 2147483648, 0, 928, 2147483648, 0, 896, 2147483648, 0, 912, 2147483648, 0, 928, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404613350464965_768_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404613350464965_768_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87ab0cee --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404613350464965_768_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,202 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 618 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1216, 1, 0, 2896, 33619968, 0, 2896, 33619968, 0, 2912, 33619968, 0, 2912, 33619968, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 6272, 85, 0, 6272, 85, 0, 6272, 85, 0, 6272, 85, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0, 576, 17, 0, 576, 17, 0, 1216, 1, 0, 2896, 33619968, 0, 2896, 33619968, 0, 2912, 33619968, 0, 2912, 33619968, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 3840, 2004318071, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5460, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5464, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5476, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 5480, 1431655765, 0, 6272, 85, 0, 6272, 85, 0, 6272, 85, 0, 6272, 85, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404641384913850_769_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404641384913850_769_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..01c4bc9c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404641384913850_769_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 67125248, 0, 2112, 67125248, 0, 2112, 67125248, 0, 2112, 67125248, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404641593503290_770_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404641593503290_770_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2a48b54a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404641593503290_770_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,513 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((377 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((384 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (389 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 336 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 8064, 17, 0, 8064, 17, 0, 12800, 537002016, 0, 12800, 537002016, 0, 12800, 537002016, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 21120, 559240, 0, 21120, 559240, 0, 21120, 559240, 0, 21120, 559240, 0, 21120, 559240, 0, 22016, 17, 0, 22016, 17, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 25344, 559240, 0, 25344, 559240, 0, 25344, 559240, 0, 25344, 559240, 0, 25344, 559240, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 4608, 2290649224, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 6592, 559240, 0, 8064, 17, 0, 8064, 17, 0, 12800, 537002016, 0, 12800, 537002016, 0, 12800, 537002016, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 19968, 1145324612, 0, 21120, 559240, 0, 21120, 559240, 0, 21120, 559240, 0, 21120, 559240, 0, 21120, 559240, 0, 22016, 17, 0, 22016, 17, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 24896, 1145324612, 0, 25344, 559240, 0, 25344, 559240, 0, 25344, 559240, 0, 25344, 559240, 0, 25344, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404650811766345_771_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404650811766345_771_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3911c615 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404650811766345_771_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,97 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 11))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2320, 131072, 0, 2336, 131072, 0, 2320, 131072, 0, 2336, 131072, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404651127044390_772_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404651127044390_772_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bd7ce28c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404651127044390_772_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,312 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 31)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 342 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 2896, 268435457, 0, 2896, 268435457, 0, 2912, 268435457, 0, 2912, 268435457, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 5264, 536870928, 0, 5264, 536870928, 0, 5280, 536870928, 0, 5280, 536870928, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 9088, 2097152, 0, 9728, 17, 0, 9728, 17, 0, 10688, 34, 0, 10688, 34, 0, 10704, 34, 0, 10704, 34, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 1088, 4227858447, 0, 2896, 268435457, 0, 2896, 268435457, 0, 2912, 268435457, 0, 2912, 268435457, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4048, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 4064, 3221225535, 0, 5264, 536870928, 0, 5264, 536870928, 0, 5280, 536870928, 0, 5280, 536870928, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6912, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 6928, 62915520, 0, 9088, 2097152, 0, 9728, 17, 0, 9728, 17, 0, 10688, 34, 0, 10688, 34, 0, 10704, 34, 0, 10704, 34, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404673803458754_773_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404673803458754_773_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..63c3f686 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404673803458754_773_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,301 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 444 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 16, 0, 864, 16, 0, 1808, 17, 0, 1808, 17, 0, 1824, 17, 0, 1824, 17, 0, 5524, 4, 0, 5528, 4, 0, 5532, 4, 0, 5540, 4, 0, 5544, 4, 0, 5548, 4, 0, 5968, 279620, 0, 5968, 279620, 0, 5968, 279620, 0, 5968, 279620, 0, 5968, 279620, 0, 5984, 279620, 0, 5984, 279620, 0, 5984, 279620, 0, 5984, 279620, 0, 5984, 279620, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6992, 1048576, 0, 7008, 1048576, 0, 7808, 85, 0, 7808, 85, 0, 7808, 85, 0, 7808, 85, 0, 8448, 8, 0, 9728, 2147483650, 0, 9728, 2147483650, 0, 10048, 545392672, 0, 10048, 545392672, 0, 10048, 545392672, 0, 10048, 545392672, 0, 10048, 545392672, 0, 15360, 1, 0, 16896, 2147483649, 0, 16896, 2147483649, 0, 17856, 8912898, 0, 17856, 8912898, 0, 17856, 8912898, 0, 848, 16, 0, 864, 16, 0, 1808, 17, 0, 1808, 17, 0, 1824, 17, 0, 1824, 17, 0, 5524, 4, 0, 5528, 4, 0, 5532, 4, 0, 5540, 4, 0, 5544, 4, 0, 5548, 4, 0, 5968, 279620, 0, 5968, 279620, 0, 5968, 279620, 0, 5968, 279620, 0, 5968, 279620, 0, 5984, 279620, 0, 5984, 279620, 0, 5984, 279620, 0, 5984, 279620, 0, 5984, 279620, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6544, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6560, 1431655765, 0, 6992, 1048576, 0, 7008, 1048576, 0, 7808, 85, 0, 7808, 85, 0, 7808, 85, 0, 7808, 85, 0, 8448, 8, 0, 9728, 2147483650, 0, 9728, 2147483650, 0, 10048, 545392672, 0, 10048, 545392672, 0, 10048, 545392672, 0, 10048, 545392672, 0, 10048, 545392672, 0, 15360, 1, 0, 16896, 2147483649, 0, 16896, 2147483649, 0, 17856, 8912898, 0, 17856, 8912898, 0, 17856, 8912898, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404739279799321_774_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404739279799321_774_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9a014cc7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404739279799321_774_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756404739607274186_775_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756404739607274186_775_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c332a12b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756404739607274186_775_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,325 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 2112, 8, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 10048, 1073741825, 0, 10048, 1073741825, 0, 10752, 1073741825, 0, 10752, 1073741825, 0, 11328, 272696336, 0, 11328, 272696336, 0, 11328, 272696336, 0, 11328, 272696336, 0, 11328, 272696336, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 13824, 33554432, 0, 15744, 2048, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 2112, 8, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 6976, 3758096399, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 10048, 1073741825, 0, 10048, 1073741825, 0, 10752, 1073741825, 0, 10752, 1073741825, 0, 11328, 272696336, 0, 11328, 272696336, 0, 11328, 272696336, 0, 11328, 272696336, 0, 11328, 272696336, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 11648, 68174084, 0, 13824, 33554432, 0, 15744, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405200223210161_778_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405200223210161_778_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..425655fd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405200223210161_778_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 13)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2304, 8192, 0, 2752, 8192, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2304, 8192, 0, 2752, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405200533415899_779_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405200533415899_779_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..06e54260 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405200533415899_779_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,350 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 7808, 2290614272, 0, 7808, 2290614272, 0, 7808, 2290614272, 0, 7808, 2290614272, 0, 7824, 2290614272, 0, 7824, 2290614272, 0, 7824, 2290614272, 0, 7824, 2290614272, 0, 7840, 2290614272, 0, 7840, 2290614272, 0, 7840, 2290614272, 0, 7840, 2290614272, 0, 10944, 134217728, 0, 10960, 134217728, 0, 10976, 134217728, 0, 11968, 2290614272, 0, 11968, 2290614272, 0, 11968, 2290614272, 0, 11968, 2290614272, 0, 11984, 2290614272, 0, 11984, 2290614272, 0, 11984, 2290614272, 0, 11984, 2290614272, 0, 12000, 2290614272, 0, 12000, 2290614272, 0, 12000, 2290614272, 0, 12000, 2290614272, 0, 13184, 85, 0, 13184, 85, 0, 13184, 85, 0, 13184, 85, 0, 18240, 136314880, 0, 18240, 136314880, 0, 18256, 136314880, 0, 18256, 136314880, 0, 18272, 136314880, 0, 18272, 136314880, 0, 19520, 545392672, 0, 19520, 545392672, 0, 19520, 545392672, 0, 19520, 545392672, 0, 19520, 545392672, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 7808, 2290614272, 0, 7808, 2290614272, 0, 7808, 2290614272, 0, 7808, 2290614272, 0, 7824, 2290614272, 0, 7824, 2290614272, 0, 7824, 2290614272, 0, 7824, 2290614272, 0, 7840, 2290614272, 0, 7840, 2290614272, 0, 7840, 2290614272, 0, 7840, 2290614272, 0, 10944, 134217728, 0, 10960, 134217728, 0, 10976, 134217728, 0, 11968, 2290614272, 0, 11968, 2290614272, 0, 11968, 2290614272, 0, 11968, 2290614272, 0, 11984, 2290614272, 0, 11984, 2290614272, 0, 11984, 2290614272, 0, 11984, 2290614272, 0, 12000, 2290614272, 0, 12000, 2290614272, 0, 12000, 2290614272, 0, 12000, 2290614272, 0, 13184, 85, 0, 13184, 85, 0, 13184, 85, 0, 13184, 85, 0, 18240, 136314880, 0, 18240, 136314880, 0, 18256, 136314880, 0, 18256, 136314880, 0, 18272, 136314880, 0, 18272, 136314880, 0, 19520, 545392672, 0, 19520, 545392672, 0, 19520, 545392672, 0, 19520, 545392672, 0, 19520, 545392672, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405236249269061_781_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405236249269061_781_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dc7d83a7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405236249269061_781_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,213 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 21))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5696, 1426063360, 0, 5696, 1426063360, 0, 5696, 1426063360, 0, 5696, 1426063360, 0, 7184, 1, 0, 7200, 1, 0, 7216, 1, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5696, 1426063360, 0, 5696, 1426063360, 0, 5696, 1426063360, 0, 5696, 1426063360, 0, 7184, 1, 0, 7200, 1, 0, 7216, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405237606312027_782_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405237606312027_782_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff3e4ded --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405237606312027_782_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 7632, 612368420, 0, 7632, 612368420, 0, 7632, 612368420, 0, 7632, 612368420, 0, 7632, 612368420, 0, 7648, 612368420, 0, 7648, 612368420, 0, 7648, 612368420, 0, 7648, 612368420, 0, 7648, 612368420, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 7632, 612368420, 0, 7632, 612368420, 0, 7632, 612368420, 0, 7632, 612368420, 0, 7632, 612368420, 0, 7648, 612368420, 0, 7648, 612368420, 0, 7648, 612368420, 0, 7648, 612368420, 0, 7648, 612368420, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405238318603080_783_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405238318603080_783_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4b5a8256 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405238318603080_783_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405238658153607_784_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405238658153607_784_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c2caf20d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405238658153607_784_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30))) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 480 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1409286145, 0, 1344, 1409286145, 0, 1344, 1409286145, 0, 1344, 1409286145, 0, 1360, 1409286145, 0, 1360, 1409286145, 0, 1360, 1409286145, 0, 1360, 1409286145, 0, 1376, 1409286145, 0, 1376, 1409286145, 0, 1376, 1409286145, 0, 1376, 1409286145, 0, 2304, 1073741824, 0, 2308, 1073741824, 0, 2312, 1073741824, 0, 2320, 1073741824, 0, 2324, 1073741824, 0, 2328, 1073741824, 0, 2336, 1073741824, 0, 2340, 1073741824, 0, 2344, 1073741824, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 6160, 536903680, 0, 6160, 536903680, 0, 6176, 536903680, 0, 6176, 536903680, 0, 7120, 8388608, 0, 7136, 8388608, 0, 7696, 2852126720, 0, 7696, 2852126720, 0, 7696, 2852126720, 0, 7696, 2852126720, 0, 7712, 2852126720, 0, 7712, 2852126720, 0, 7712, 2852126720, 0, 7712, 2852126720, 0, 9600, 1073741824, 0, 9616, 1073741824, 0, 9632, 1073741824, 0, 11392, 17, 0, 11392, 17, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 1344, 1409286145, 0, 1344, 1409286145, 0, 1344, 1409286145, 0, 1344, 1409286145, 0, 1360, 1409286145, 0, 1360, 1409286145, 0, 1360, 1409286145, 0, 1360, 1409286145, 0, 1376, 1409286145, 0, 1376, 1409286145, 0, 1376, 1409286145, 0, 1376, 1409286145, 0, 2304, 1073741824, 0, 2308, 1073741824, 0, 2312, 1073741824, 0, 2320, 1073741824, 0, 2324, 1073741824, 0, 2328, 1073741824, 0, 2336, 1073741824, 0, 2340, 1073741824, 0, 2344, 1073741824, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3216, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 3232, 2863267840, 0, 6160, 536903680, 0, 6160, 536903680, 0, 6176, 536903680, 0, 6176, 536903680, 0, 7120, 8388608, 0, 7136, 8388608, 0, 7696, 2852126720, 0, 7696, 2852126720, 0, 7696, 2852126720, 0, 7696, 2852126720, 0, 7712, 2852126720, 0, 7712, 2852126720, 0, 7712, 2852126720, 0, 7712, 2852126720, 0, 9600, 1073741824, 0, 9616, 1073741824, 0, 9632, 1073741824, 0, 11392, 17, 0, 11392, 17, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 12288, 1145324612, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14224, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0, 14240, 2290649224, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405324276897834_785_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405324276897834_785_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..23fff983 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405324276897834_785_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,229 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((135 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 5312, 73, 0, 5312, 73, 0, 5312, 73, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 7504, 603979776, 0, 7504, 603979776, 0, 7520, 603979776, 0, 7520, 603979776, 0, 7536, 603979776, 0, 7536, 603979776, 0, 8660, 536871204, 0, 8660, 536871204, 0, 8660, 536871204, 0, 8660, 536871204, 0, 8676, 536871204, 0, 8676, 536871204, 0, 8676, 536871204, 0, 8676, 536871204, 0, 8692, 536871204, 0, 8692, 536871204, 0, 8692, 536871204, 0, 8692, 536871204, 0, 9296, 536870912, 0, 9312, 536870912, 0, 9328, 536870912, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 4672, 3623878747, 0, 5312, 73, 0, 5312, 73, 0, 5312, 73, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 7504, 603979776, 0, 7504, 603979776, 0, 7520, 603979776, 0, 7520, 603979776, 0, 7536, 603979776, 0, 7536, 603979776, 0, 8660, 536871204, 0, 8660, 536871204, 0, 8660, 536871204, 0, 8660, 536871204, 0, 8676, 536871204, 0, 8676, 536871204, 0, 8676, 536871204, 0, 8676, 536871204, 0, 8692, 536871204, 0, 8692, 536871204, 0, 8692, 536871204, 0, 8692, 536871204, 0, 9296, 536870912, 0, 9312, 536870912, 0, 9328, 536870912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405330298100701_786_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405330298100701_786_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..330142eb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405330298100701_786_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 12)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 408 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 16777224, 0, 1856, 16777224, 0, 1872, 16777224, 0, 1872, 16777224, 0, 2944, 65, 0, 2944, 65, 0, 2960, 65, 0, 2960, 65, 0, 3392, 585, 0, 3392, 585, 0, 3392, 585, 0, 3392, 585, 0, 3408, 585, 0, 3408, 585, 0, 3408, 585, 0, 3408, 585, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4928, 73, 0, 4928, 73, 0, 4928, 73, 0, 6144, 272696336, 0, 6144, 272696336, 0, 6144, 272696336, 0, 6144, 272696336, 0, 6144, 272696336, 0, 11024, 8388608, 0, 11028, 8388608, 0, 11032, 8388608, 0, 11040, 8388608, 0, 11044, 8388608, 0, 11048, 8388608, 0, 11056, 8388608, 0, 11060, 8388608, 0, 11064, 8388608, 0, 12624, 8388608, 0, 12640, 8388608, 0, 12656, 8388608, 0, 13840, 67110912, 0, 13840, 67110912, 0, 13856, 67110912, 0, 13856, 67110912, 0, 13872, 67110912, 0, 13872, 67110912, 0, 1856, 16777224, 0, 1856, 16777224, 0, 1872, 16777224, 0, 1872, 16777224, 0, 2944, 65, 0, 2944, 65, 0, 2960, 65, 0, 2960, 65, 0, 3392, 585, 0, 3392, 585, 0, 3392, 585, 0, 3392, 585, 0, 3408, 585, 0, 3408, 585, 0, 3408, 585, 0, 3408, 585, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4288, 3067833782, 0, 4928, 73, 0, 4928, 73, 0, 4928, 73, 0, 6144, 272696336, 0, 6144, 272696336, 0, 6144, 272696336, 0, 6144, 272696336, 0, 6144, 272696336, 0, 11024, 8388608, 0, 11028, 8388608, 0, 11032, 8388608, 0, 11040, 8388608, 0, 11044, 8388608, 0, 11048, 8388608, 0, 11056, 8388608, 0, 11060, 8388608, 0, 11064, 8388608, 0, 12624, 8388608, 0, 12640, 8388608, 0, 12656, 8388608, 0, 13840, 67110912, 0, 13840, 67110912, 0, 13856, 67110912, 0, 13856, 67110912, 0, 13872, 67110912, 0, 13872, 67110912, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405334152580966_787_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405334152580966_787_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..00e1ea76 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405334152580966_787_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 33685536, 0, 1408, 33685536, 0, 1408, 33685536, 0, 1424, 33685536, 0, 1424, 33685536, 0, 1424, 33685536, 0, 1440, 33685536, 0, 1440, 33685536, 0, 1440, 33685536, 0, 1408, 33685536, 0, 1408, 33685536, 0, 1408, 33685536, 0, 1424, 33685536, 0, 1424, 33685536, 0, 1424, 33685536, 0, 1440, 33685536, 0, 1440, 33685536, 0, 1440, 33685536, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405437459645445_789_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405437459645445_789_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..80da9450 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405437459645445_789_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1472, 2181570690, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0, 1792, 3067833782, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405499513606607_791_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405499513606607_791_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..555552f0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405499513606607_791_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,279 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((189 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((213 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((220 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 840 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1, 0, 8512, 10485760, 0, 8512, 10485760, 0, 9984, 1074872324, 0, 9984, 1074872324, 0, 9984, 1074872324, 0, 9984, 1074872324, 0, 9984, 1074872324, 0, 10896, 64, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12752, 17, 0, 12752, 17, 0, 12756, 17, 0, 12756, 17, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15824, 4096, 0, 1344, 1, 0, 8512, 10485760, 0, 8512, 10485760, 0, 9984, 1074872324, 0, 9984, 1074872324, 0, 9984, 1074872324, 0, 9984, 1074872324, 0, 9984, 1074872324, 0, 10896, 64, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12112, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12116, 4278190143, 0, 12752, 17, 0, 12752, 17, 0, 12756, 17, 0, 12756, 17, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13648, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 13652, 1717986918, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14096, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 14100, 978670, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15184, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15188, 4292870271, 0, 15824, 4096, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405602114364973_795_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405602114364973_795_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..839032cb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405602114364973_795_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,192 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 16))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 65536, 0, 4224, 73, 0, 4224, 73, 0, 4224, 73, 0, 4864, 73, 0, 4864, 73, 0, 4864, 73, 0, 6016, 2147483650, 0, 6016, 2147483650, 0, 6720, 2147483650, 0, 6720, 2147483650, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 8000, 73, 0, 8000, 73, 0, 8000, 73, 0, 8576, 272696336, 0, 8576, 272696336, 0, 8576, 272696336, 0, 8576, 272696336, 0, 8576, 272696336, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 3584, 65536, 0, 4224, 73, 0, 4224, 73, 0, 4224, 73, 0, 4864, 73, 0, 4864, 73, 0, 4864, 73, 0, 6016, 2147483650, 0, 6016, 2147483650, 0, 6720, 2147483650, 0, 6720, 2147483650, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 7360, 613566756, 0, 8000, 73, 0, 8000, 73, 0, 8000, 73, 0, 8576, 272696336, 0, 8576, 272696336, 0, 8576, 272696336, 0, 8576, 272696336, 0, 8576, 272696336, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0, 8896, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405699446449316_798_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405699446449316_798_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f85f81bd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405699446449316_798_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 486 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2064, 1711276032, 0, 2064, 1711276032, 0, 2064, 1711276032, 0, 2064, 1711276032, 0, 2080, 1711276032, 0, 2080, 1711276032, 0, 2080, 1711276032, 0, 2080, 1711276032, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3984, 6, 0, 3984, 6, 0, 4000, 6, 0, 4000, 6, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 5056, 85, 0, 5056, 85, 0, 5056, 85, 0, 5056, 85, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 576, 17, 0, 576, 17, 0, 2064, 1711276032, 0, 2064, 1711276032, 0, 2064, 1711276032, 0, 2064, 1711276032, 0, 2080, 1711276032, 0, 2080, 1711276032, 0, 2080, 1711276032, 0, 2080, 1711276032, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2960, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 2976, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3536, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3552, 572662306, 0, 3984, 6, 0, 3984, 6, 0, 4000, 6, 0, 4000, 6, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 4416, 978670, 0, 5056, 85, 0, 5056, 85, 0, 5056, 85, 0, 5056, 85, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0, 5632, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405711666175993_800_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405711666175993_800_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..25027edf --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405711666175993_800_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,299 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 23))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 12))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((359 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 2880, 2147483649, 0, 2880, 2147483649, 0, 2896, 2147483649, 0, 2896, 2147483649, 0, 6528, 4160, 0, 6528, 4160, 0, 6532, 4160, 0, 6532, 4160, 0, 6536, 4160, 0, 6536, 4160, 0, 6544, 4160, 0, 6544, 4160, 0, 6548, 4160, 0, 6548, 4160, 0, 6552, 4160, 0, 6552, 4160, 0, 7488, 272696336, 0, 7488, 272696336, 0, 7488, 272696336, 0, 7488, 272696336, 0, 7488, 272696336, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 16128, 2147483648, 0, 17808, 524288, 0, 17824, 524288, 0, 19024, 65536, 0, 19040, 65536, 0, 768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 2880, 2147483649, 0, 2880, 2147483649, 0, 2896, 2147483649, 0, 2896, 2147483649, 0, 6528, 4160, 0, 6528, 4160, 0, 6532, 4160, 0, 6532, 4160, 0, 6536, 4160, 0, 6536, 4160, 0, 6544, 4160, 0, 6544, 4160, 0, 6548, 4160, 0, 6548, 4160, 0, 6552, 4160, 0, 6552, 4160, 0, 7488, 272696336, 0, 7488, 272696336, 0, 7488, 272696336, 0, 7488, 272696336, 0, 7488, 272696336, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 7808, 613566756, 0, 16128, 2147483648, 0, 17808, 524288, 0, 17824, 524288, 0, 19024, 65536, 0, 19040, 65536, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405760897902661_801_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405760897902661_801_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d388388d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405760897902661_801_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,324 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1616, 1342177280, 0, 1616, 1342177280, 0, 5520, 536870912, 0, 5524, 536870912, 0, 8064, 16, 0, 9408, 585, 0, 9408, 585, 0, 9408, 585, 0, 9408, 585, 0, 10432, 65, 0, 10432, 65, 0, 11136, 18874368, 0, 11136, 18874368, 0, 12736, 2097152, 0, 12752, 2097152, 0, 12768, 2097152, 0, 14272, 2, 0, 15808, 2, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 1616, 1342177280, 0, 1616, 1342177280, 0, 5520, 536870912, 0, 5524, 536870912, 0, 8064, 16, 0, 9408, 585, 0, 9408, 585, 0, 9408, 585, 0, 9408, 585, 0, 10432, 65, 0, 10432, 65, 0, 11136, 18874368, 0, 11136, 18874368, 0, 12736, 2097152, 0, 12752, 2097152, 0, 12768, 2097152, 0, 14272, 2, 0, 15808, 2, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0, 21696, 76695844, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405765328526430_802_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405765328526430_802_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a46ff1d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405765328526430_802_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 2048, 0, 3392, 1, 0, 4864, 1, 0, 4868, 1, 0, 4872, 1, 0, 4880, 1, 0, 4884, 1, 0, 4888, 1, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 9552, 2132000, 0, 9552, 2132000, 0, 9552, 2132000, 0, 9552, 2132000, 0, 9568, 2132000, 0, 9568, 2132000, 0, 9568, 2132000, 0, 9568, 2132000, 0, 10708, 2147483648, 0, 10712, 2147483648, 0, 10716, 2147483648, 0, 10724, 2147483648, 0, 10728, 2147483648, 0, 10732, 2147483648, 0, 2112, 2048, 0, 3392, 1, 0, 4864, 1, 0, 4868, 1, 0, 4872, 1, 0, 4880, 1, 0, 4884, 1, 0, 4888, 1, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 9552, 2132000, 0, 9552, 2132000, 0, 9552, 2132000, 0, 9552, 2132000, 0, 9568, 2132000, 0, 9568, 2132000, 0, 9568, 2132000, 0, 9568, 2132000, 0, 10708, 2147483648, 0, 10712, 2147483648, 0, 10716, 2147483648, 0, 10724, 2147483648, 0, 10728, 2147483648, 0, 10732, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405792950225779_804_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405792950225779_804_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..43404ab1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405792950225779_804_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((261 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 1073741833, 0, 1472, 1073741833, 0, 1472, 1073741833, 0, 1488, 1073741833, 0, 1488, 1073741833, 0, 1488, 1073741833, 0, 6848, 1090519049, 0, 6848, 1090519049, 0, 6848, 1090519049, 0, 6848, 1090519049, 0, 6864, 1090519049, 0, 6864, 1090519049, 0, 6864, 1090519049, 0, 6864, 1090519049, 0, 8064, 512, 0, 8080, 512, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 9856, 16, 0, 18624, 65536, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 1472, 1073741833, 0, 1472, 1073741833, 0, 1472, 1073741833, 0, 1488, 1073741833, 0, 1488, 1073741833, 0, 1488, 1073741833, 0, 6848, 1090519049, 0, 6848, 1090519049, 0, 6848, 1090519049, 0, 6848, 1090519049, 0, 6864, 1090519049, 0, 6864, 1090519049, 0, 6864, 1090519049, 0, 6864, 1090519049, 0, 8064, 512, 0, 8080, 512, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8768, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 8784, 1224737353, 0, 9856, 16, 0, 18624, 65536, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0, 18944, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405825046768823_805_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405825046768823_805_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..738ef03c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405825046768823_805_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 768, 65, 0, 768, 65, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1344, 272696336, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0, 1664, 68174084, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405894682162163_807_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405894682162163_807_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3589d3d7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405894682162163_807_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,92 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0, 1152, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405894947760185_808_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405894947760185_808_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..098fe2c2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405894947760185_808_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,350 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 18)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() >= 31)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 17))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((342 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 2368, 68174084, 0, 2368, 68174084, 0, 2368, 68174084, 0, 2368, 68174084, 0, 2368, 68174084, 0, 2384, 68174084, 0, 2384, 68174084, 0, 2384, 68174084, 0, 2384, 68174084, 0, 2384, 68174084, 0, 4096, 68157440, 0, 4096, 68157440, 0, 4100, 68157440, 0, 4100, 68157440, 0, 4104, 68157440, 0, 4104, 68157440, 0, 4112, 68157440, 0, 4112, 68157440, 0, 4116, 68157440, 0, 4116, 68157440, 0, 4120, 68157440, 0, 4120, 68157440, 0, 4672, 545259520, 0, 4672, 545259520, 0, 4676, 545259520, 0, 4676, 545259520, 0, 4680, 545259520, 0, 4680, 545259520, 0, 4688, 545259520, 0, 4688, 545259520, 0, 4692, 545259520, 0, 4692, 545259520, 0, 4696, 545259520, 0, 4696, 545259520, 0, 5248, 536870912, 0, 5264, 536870912, 0, 5952, 68174084, 0, 5952, 68174084, 0, 5952, 68174084, 0, 5952, 68174084, 0, 5952, 68174084, 0, 5968, 68174084, 0, 5968, 68174084, 0, 5968, 68174084, 0, 5968, 68174084, 0, 5968, 68174084, 0, 8896, 67109120, 0, 8896, 67109120, 0, 8512, 16777216, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 16256, 73, 0, 16256, 73, 0, 16256, 73, 0, 16832, 272696336, 0, 16832, 272696336, 0, 16832, 272696336, 0, 16832, 272696336, 0, 16832, 272696336, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 2368, 68174084, 0, 2368, 68174084, 0, 2368, 68174084, 0, 2368, 68174084, 0, 2368, 68174084, 0, 2384, 68174084, 0, 2384, 68174084, 0, 2384, 68174084, 0, 2384, 68174084, 0, 2384, 68174084, 0, 4096, 68157440, 0, 4096, 68157440, 0, 4100, 68157440, 0, 4100, 68157440, 0, 4104, 68157440, 0, 4104, 68157440, 0, 4112, 68157440, 0, 4112, 68157440, 0, 4116, 68157440, 0, 4116, 68157440, 0, 4120, 68157440, 0, 4120, 68157440, 0, 4672, 545259520, 0, 4672, 545259520, 0, 4676, 545259520, 0, 4676, 545259520, 0, 4680, 545259520, 0, 4680, 545259520, 0, 4688, 545259520, 0, 4688, 545259520, 0, 4692, 545259520, 0, 4692, 545259520, 0, 4696, 545259520, 0, 4696, 545259520, 0, 5248, 536870912, 0, 5264, 536870912, 0, 5952, 68174084, 0, 5952, 68174084, 0, 5952, 68174084, 0, 5952, 68174084, 0, 5952, 68174084, 0, 5968, 68174084, 0, 5968, 68174084, 0, 5968, 68174084, 0, 5968, 68174084, 0, 5968, 68174084, 0, 8896, 67109120, 0, 8896, 67109120, 0, 8512, 16777216, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 8256, 4194304000, 0, 16256, 73, 0, 16256, 73, 0, 16256, 73, 0, 16832, 272696336, 0, 16832, 272696336, 0, 16832, 272696336, 0, 16832, 272696336, 0, 16832, 272696336, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405948325414603_811_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405948325414603_811_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..456f7c37 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405948325414603_811_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,129 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756405948555317200_812_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756405948555317200_812_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..229dd593 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756405948555317200_812_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,224 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 450 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 268435457, 0, 1280, 268435457, 0, 3280, 1114112, 0, 3280, 1114112, 0, 4240, 1048576, 0, 4244, 1048576, 0, 4248, 1048576, 0, 6544, 819, 0, 6544, 819, 0, 6544, 819, 0, 6544, 819, 0, 6544, 819, 0, 6544, 819, 0, 6560, 819, 0, 6560, 819, 0, 6560, 819, 0, 6560, 819, 0, 6560, 819, 0, 6560, 819, 0, 8212, 536875008, 0, 8212, 536875008, 0, 8216, 536875008, 0, 8216, 536875008, 0, 8228, 536875008, 0, 8228, 536875008, 0, 8232, 536875008, 0, 8232, 536875008, 0, 10900, 1048576, 0, 10904, 1048576, 0, 10916, 1048576, 0, 10920, 1048576, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 1280, 268435457, 0, 1280, 268435457, 0, 3280, 1114112, 0, 3280, 1114112, 0, 4240, 1048576, 0, 4244, 1048576, 0, 4248, 1048576, 0, 6544, 819, 0, 6544, 819, 0, 6544, 819, 0, 6544, 819, 0, 6544, 819, 0, 6544, 819, 0, 6560, 819, 0, 6560, 819, 0, 6560, 819, 0, 6560, 819, 0, 6560, 819, 0, 6560, 819, 0, 8212, 536875008, 0, 8212, 536875008, 0, 8216, 536875008, 0, 8216, 536875008, 0, 8228, 536875008, 0, 8228, 536875008, 0, 8232, 536875008, 0, 8232, 536875008, 0, 10900, 1048576, 0, 10904, 1048576, 0, 10916, 1048576, 0, 10920, 1048576, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11392, 2004318071, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0, 11840, 1048575, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406085867801110_814_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406085867801110_814_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2f185818 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406085867801110_814_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,71 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1152, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0, 1168, 4227858435, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406086462078823_815_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406086462078823_815_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d0b946d0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406086462078823_815_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 11)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406086627860527_816_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406086627860527_816_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..027389cd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406086627860527_816_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } + } else { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4304, 256, 0, 4308, 256, 0, 4312, 256, 0, 4320, 256, 0, 4324, 256, 0, 4328, 256, 0, 4336, 256, 0, 4340, 256, 0, 4344, 256, 0, 5840, 64, 0, 5844, 64, 0, 5848, 64, 0, 5856, 64, 0, 5860, 64, 0, 5864, 64, 0, 5872, 64, 0, 5876, 64, 0, 5880, 64, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 10368, 2852126722, 0, 10368, 2852126722, 0, 10368, 2852126722, 0, 10368, 2852126722, 0, 10368, 2852126722, 0, 10384, 2852126722, 0, 10384, 2852126722, 0, 10384, 2852126722, 0, 10384, 2852126722, 0, 10384, 2852126722, 0, 12608, 4194320, 0, 12608, 4194320, 0, 4304, 256, 0, 4308, 256, 0, 4312, 256, 0, 4320, 256, 0, 4324, 256, 0, 4328, 256, 0, 4336, 256, 0, 4340, 256, 0, 4344, 256, 0, 5840, 64, 0, 5844, 64, 0, 5848, 64, 0, 5856, 64, 0, 5860, 64, 0, 5864, 64, 0, 5872, 64, 0, 5876, 64, 0, 5880, 64, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 9024, 2863311530, 0, 10368, 2852126722, 0, 10368, 2852126722, 0, 10368, 2852126722, 0, 10368, 2852126722, 0, 10368, 2852126722, 0, 10384, 2852126722, 0, 10384, 2852126722, 0, 10384, 2852126722, 0, 10384, 2852126722, 0, 10384, 2852126722, 0, 12608, 4194320, 0, 12608, 4194320, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406171374206445_818_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406171374206445_818_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..954aceea --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406171374206445_818_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,217 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 10))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 4, 0, 1940, 4194304, 0, 1944, 4194304, 0, 1948, 4194304, 0, 2580, 85, 0, 2580, 85, 0, 2580, 85, 0, 2580, 85, 0, 2584, 85, 0, 2584, 85, 0, 2584, 85, 0, 2584, 85, 0, 2588, 85, 0, 2588, 85, 0, 2588, 85, 0, 2588, 85, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 4432, 65536, 0, 6848, 32, 0, 1040, 4, 0, 1940, 4194304, 0, 1944, 4194304, 0, 1948, 4194304, 0, 2580, 85, 0, 2580, 85, 0, 2580, 85, 0, 2580, 85, 0, 2584, 85, 0, 2584, 85, 0, 2584, 85, 0, 2584, 85, 0, 2588, 85, 0, 2588, 85, 0, 2588, 85, 0, 2588, 85, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3156, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3160, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 3164, 1431655765, 0, 4432, 65536, 0, 6848, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406192034505963_820_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406192034505963_820_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f8a85b1f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406192034505963_820_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19))) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 26))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5440, 512, 0, 5456, 512, 0, 5472, 512, 0, 6272, 512, 0, 6288, 512, 0, 6304, 512, 0, 5440, 512, 0, 5456, 512, 0, 5472, 512, 0, 6272, 512, 0, 6288, 512, 0, 6304, 512, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406513987600720_822_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406513987600720_822_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..885a7638 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406513987600720_822_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,303 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [14912, 603979776, 0, 14912, 603979776, 0, 14928, 603979776, 0, 14928, 603979776, 0, 16128, 1050624, 0, 16128, 1050624, 0, 16144, 1050624, 0, 16144, 1050624, 0, 18304, 4, 0, 18320, 4, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 7616, 73, 0, 7616, 73, 0, 7616, 73, 0, 11968, 2449473536, 0, 11968, 2449473536, 0, 11968, 2449473536, 0, 12800, 2449473536, 0, 12800, 2449473536, 0, 12800, 2449473536, 0, 1216, 1, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 5200, 268500992, 0, 5200, 268500992, 0, 5216, 268500992, 0, 5216, 268500992, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 14912, 603979776, 0, 14912, 603979776, 0, 14928, 603979776, 0, 14928, 603979776, 0, 16128, 1050624, 0, 16128, 1050624, 0, 16144, 1050624, 0, 16144, 1050624, 0, 18304, 4, 0, 18320, 4, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6784, 4294901760, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 6400, 43690, 0, 7616, 73, 0, 7616, 73, 0, 7616, 73, 0, 11968, 2449473536, 0, 11968, 2449473536, 0, 11968, 2449473536, 0, 12800, 2449473536, 0, 12800, 2449473536, 0, 12800, 2449473536, 0, 1216, 1, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2512, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 2528, 2453667986, 0, 5200, 268500992, 0, 5200, 268500992, 0, 5216, 268500992, 0, 5216, 268500992, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0, 5504, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406523400518151_823_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406523400518151_823_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..19128e32 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406523400518151_823_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3584, 8192, 0, 5696, 3758096385, 0, 5696, 3758096385, 0, 5696, 3758096385, 0, 5696, 3758096385, 0, 5712, 3758096385, 0, 5712, 3758096385, 0, 5712, 3758096385, 0, 5712, 3758096385, 0, 6400, 3758096385, 0, 6400, 3758096385, 0, 6400, 3758096385, 0, 6400, 3758096385, 0, 6416, 3758096385, 0, 6416, 3758096385, 0, 6416, 3758096385, 0, 6416, 3758096385, 0, 7424, 520093696, 0, 7424, 520093696, 0, 7424, 520093696, 0, 7424, 520093696, 0, 7424, 520093696, 0, 7440, 520093696, 0, 7440, 520093696, 0, 7440, 520093696, 0, 7440, 520093696, 0, 7440, 520093696, 0, 8512, 17, 0, 8512, 17, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 10176, 1, 0, 10816, 17, 0, 10816, 17, 0, 11712, 4, 0, 12160, 8, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3584, 8192, 0, 5696, 3758096385, 0, 5696, 3758096385, 0, 5696, 3758096385, 0, 5696, 3758096385, 0, 5712, 3758096385, 0, 5712, 3758096385, 0, 5712, 3758096385, 0, 5712, 3758096385, 0, 6400, 3758096385, 0, 6400, 3758096385, 0, 6400, 3758096385, 0, 6400, 3758096385, 0, 6416, 3758096385, 0, 6416, 3758096385, 0, 6416, 3758096385, 0, 6416, 3758096385, 0, 7424, 520093696, 0, 7424, 520093696, 0, 7424, 520093696, 0, 7424, 520093696, 0, 7424, 520093696, 0, 7440, 520093696, 0, 7440, 520093696, 0, 7440, 520093696, 0, 7440, 520093696, 0, 7440, 520093696, 0, 8512, 17, 0, 8512, 17, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9088, 286331153, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 9408, 2004318071, 0, 10176, 1, 0, 10816, 17, 0, 10816, 17, 0, 11712, 4, 0, 12160, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406535727162324_824_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406535727162324_824_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e33050d3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406535727162324_824_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,355 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20))) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1664, 85, 0, 1664, 85, 0, 1664, 85, 0, 1664, 85, 0, 4672, 85, 0, 4672, 85, 0, 4672, 85, 0, 4672, 85, 0, 6144, 17, 0, 6144, 17, 0, 7552, 2, 0, 8768, 131072, 0, 9808, 572653568, 0, 9808, 572653568, 0, 9808, 572653568, 0, 9808, 572653568, 0, 9824, 572653568, 0, 9824, 572653568, 0, 9824, 572653568, 0, 9824, 572653568, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 16320, 2048, 0, 19136, 134217728, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1024, 1431655765, 0, 1664, 85, 0, 1664, 85, 0, 1664, 85, 0, 1664, 85, 0, 4672, 85, 0, 4672, 85, 0, 4672, 85, 0, 4672, 85, 0, 6144, 17, 0, 6144, 17, 0, 7552, 2, 0, 8768, 131072, 0, 9808, 572653568, 0, 9808, 572653568, 0, 9808, 572653568, 0, 9808, 572653568, 0, 9824, 572653568, 0, 9824, 572653568, 0, 9824, 572653568, 0, 9824, 572653568, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 12544, 1145324612, 0, 16320, 2048, 0, 19136, 134217728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406541549521628_825_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406541549521628_825_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c96e0c3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406541549521628_825_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,348 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 4369, 0, 1808, 4369, 0, 1808, 4369, 0, 1808, 4369, 0, 1824, 4369, 0, 1824, 4369, 0, 1824, 4369, 0, 1824, 4369, 0, 2240, 17, 0, 2240, 17, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 4288, 2281701376, 0, 4288, 2281701376, 0, 6400, 73, 0, 6400, 73, 0, 6400, 73, 0, 6976, 272696336, 0, 6976, 272696336, 0, 6976, 272696336, 0, 6976, 272696336, 0, 6976, 272696336, 0, 12800, 17, 0, 12800, 17, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 1808, 4369, 0, 1808, 4369, 0, 1808, 4369, 0, 1808, 4369, 0, 1824, 4369, 0, 1824, 4369, 0, 1824, 4369, 0, 1824, 4369, 0, 2240, 17, 0, 2240, 17, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 3136, 1717986918, 0, 4288, 2281701376, 0, 4288, 2281701376, 0, 6400, 73, 0, 6400, 73, 0, 6400, 73, 0, 6976, 272696336, 0, 6976, 272696336, 0, 6976, 272696336, 0, 6976, 272696336, 0, 6976, 272696336, 0, 12800, 17, 0, 12800, 17, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 13696, 1145324612, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406554345179993_826_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406554345179993_826_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6e7f0889 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406554345179993_826_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 2574 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 2692, 33560578, 0, 2692, 33560578, 0, 2692, 33560578, 0, 2692, 33560578, 0, 2696, 33560578, 0, 2696, 33560578, 0, 2696, 33560578, 0, 2696, 33560578, 0, 2700, 33560578, 0, 2700, 33560578, 0, 2700, 33560578, 0, 2700, 33560578, 0, 2708, 33560578, 0, 2708, 33560578, 0, 2708, 33560578, 0, 2708, 33560578, 0, 2712, 33560578, 0, 2712, 33560578, 0, 2712, 33560578, 0, 2712, 33560578, 0, 2716, 33560578, 0, 2716, 33560578, 0, 2716, 33560578, 0, 2716, 33560578, 0, 2724, 33560578, 0, 2724, 33560578, 0, 2724, 33560578, 0, 2724, 33560578, 0, 2728, 33560578, 0, 2728, 33560578, 0, 2728, 33560578, 0, 2728, 33560578, 0, 2732, 33560578, 0, 2732, 33560578, 0, 2732, 33560578, 0, 2732, 33560578, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 4740, 1342177285, 0, 4740, 1342177285, 0, 4740, 1342177285, 0, 4740, 1342177285, 0, 4744, 1342177285, 0, 4744, 1342177285, 0, 4744, 1342177285, 0, 4744, 1342177285, 0, 4748, 1342177285, 0, 4748, 1342177285, 0, 4748, 1342177285, 0, 4748, 1342177285, 0, 4756, 1342177285, 0, 4756, 1342177285, 0, 4756, 1342177285, 0, 4756, 1342177285, 0, 4760, 1342177285, 0, 4760, 1342177285, 0, 4760, 1342177285, 0, 4760, 1342177285, 0, 4764, 1342177285, 0, 4764, 1342177285, 0, 4764, 1342177285, 0, 4764, 1342177285, 0, 4772, 1342177285, 0, 4772, 1342177285, 0, 4772, 1342177285, 0, 4772, 1342177285, 0, 4776, 1342177285, 0, 4776, 1342177285, 0, 4776, 1342177285, 0, 4776, 1342177285, 0, 4780, 1342177285, 0, 4780, 1342177285, 0, 4780, 1342177285, 0, 4780, 1342177285, 0, 5572, 1342177281, 0, 5572, 1342177281, 0, 5572, 1342177281, 0, 5576, 1342177281, 0, 5576, 1342177281, 0, 5576, 1342177281, 0, 5580, 1342177281, 0, 5580, 1342177281, 0, 5580, 1342177281, 0, 5588, 1342177281, 0, 5588, 1342177281, 0, 5588, 1342177281, 0, 5592, 1342177281, 0, 5592, 1342177281, 0, 5592, 1342177281, 0, 5596, 1342177281, 0, 5596, 1342177281, 0, 5596, 1342177281, 0, 5604, 1342177281, 0, 5604, 1342177281, 0, 5604, 1342177281, 0, 5608, 1342177281, 0, 5608, 1342177281, 0, 5608, 1342177281, 0, 5612, 1342177281, 0, 5612, 1342177281, 0, 5612, 1342177281, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 896, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 912, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 928, 4294705152, 0, 2692, 33560578, 0, 2692, 33560578, 0, 2692, 33560578, 0, 2692, 33560578, 0, 2696, 33560578, 0, 2696, 33560578, 0, 2696, 33560578, 0, 2696, 33560578, 0, 2700, 33560578, 0, 2700, 33560578, 0, 2700, 33560578, 0, 2700, 33560578, 0, 2708, 33560578, 0, 2708, 33560578, 0, 2708, 33560578, 0, 2708, 33560578, 0, 2712, 33560578, 0, 2712, 33560578, 0, 2712, 33560578, 0, 2712, 33560578, 0, 2716, 33560578, 0, 2716, 33560578, 0, 2716, 33560578, 0, 2716, 33560578, 0, 2724, 33560578, 0, 2724, 33560578, 0, 2724, 33560578, 0, 2724, 33560578, 0, 2728, 33560578, 0, 2728, 33560578, 0, 2728, 33560578, 0, 2728, 33560578, 0, 2732, 33560578, 0, 2732, 33560578, 0, 2732, 33560578, 0, 2732, 33560578, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3588, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3592, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3596, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3604, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3608, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3612, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3620, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3624, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 3628, 1431655765, 0, 4740, 1342177285, 0, 4740, 1342177285, 0, 4740, 1342177285, 0, 4740, 1342177285, 0, 4744, 1342177285, 0, 4744, 1342177285, 0, 4744, 1342177285, 0, 4744, 1342177285, 0, 4748, 1342177285, 0, 4748, 1342177285, 0, 4748, 1342177285, 0, 4748, 1342177285, 0, 4756, 1342177285, 0, 4756, 1342177285, 0, 4756, 1342177285, 0, 4756, 1342177285, 0, 4760, 1342177285, 0, 4760, 1342177285, 0, 4760, 1342177285, 0, 4760, 1342177285, 0, 4764, 1342177285, 0, 4764, 1342177285, 0, 4764, 1342177285, 0, 4764, 1342177285, 0, 4772, 1342177285, 0, 4772, 1342177285, 0, 4772, 1342177285, 0, 4772, 1342177285, 0, 4776, 1342177285, 0, 4776, 1342177285, 0, 4776, 1342177285, 0, 4776, 1342177285, 0, 4780, 1342177285, 0, 4780, 1342177285, 0, 4780, 1342177285, 0, 4780, 1342177285, 0, 5572, 1342177281, 0, 5572, 1342177281, 0, 5572, 1342177281, 0, 5576, 1342177281, 0, 5576, 1342177281, 0, 5576, 1342177281, 0, 5580, 1342177281, 0, 5580, 1342177281, 0, 5580, 1342177281, 0, 5588, 1342177281, 0, 5588, 1342177281, 0, 5588, 1342177281, 0, 5592, 1342177281, 0, 5592, 1342177281, 0, 5592, 1342177281, 0, 5596, 1342177281, 0, 5596, 1342177281, 0, 5596, 1342177281, 0, 5604, 1342177281, 0, 5604, 1342177281, 0, 5604, 1342177281, 0, 5608, 1342177281, 0, 5608, 1342177281, 0, 5608, 1342177281, 0, 5612, 1342177281, 0, 5612, 1342177281, 0, 5612, 1342177281, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6148, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6152, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6156, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6164, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6168, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6172, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6180, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6184, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0, 6188, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406848523304897_827_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406848523304897_827_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dd3d015b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406848523304897_827_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,282 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((189 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8896, 8192, 0, 8912, 8192, 0, 8928, 8192, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 12544, 16, 0, 12560, 16, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 14592, 2290089992, 0, 14592, 2290089992, 0, 14592, 2290089992, 0, 14592, 2290089992, 0, 16768, 8388608, 0, 8896, 8192, 0, 8912, 8192, 0, 8928, 8192, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 9664, 613566756, 0, 12544, 16, 0, 12560, 16, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13120, 286331153, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 13440, 1145324612, 0, 14592, 2290089992, 0, 14592, 2290089992, 0, 14592, 2290089992, 0, 14592, 2290089992, 0, 16768, 8388608, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406884562029643_828_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406884562029643_828_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..64e7a55b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406884562029643_828_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,374 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 16)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((159 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 21))) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 28))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2112, 545392672, 0, 2112, 545392672, 0, 2112, 545392672, 0, 2112, 545392672, 0, 2112, 545392672, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 3392, 73, 0, 3392, 73, 0, 3392, 73, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 14400, 1048576, 0, 14416, 1048576, 0, 18880, 1048576, 0, 18896, 1048576, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2112, 545392672, 0, 2112, 545392672, 0, 2112, 545392672, 0, 2112, 545392672, 0, 2112, 545392672, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 3392, 73, 0, 3392, 73, 0, 3392, 73, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 3968, 272696336, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 5568, 613566756, 0, 14400, 1048576, 0, 14416, 1048576, 0, 18880, 1048576, 0, 18896, 1048576, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0, 21120, 838860, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406899615757286_829_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406899615757286_829_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..738e6482 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406899615757286_829_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 8)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 21))) { + if ((WaveGetLaneIndex() == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 384 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2004, 2097664, 0, 2004, 2097664, 0, 2008, 2097664, 0, 2008, 2097664, 0, 2020, 2097664, 0, 2020, 2097664, 0, 2024, 2097664, 0, 2024, 2097664, 0, 2036, 2097664, 0, 2036, 2097664, 0, 2040, 2097664, 0, 2040, 2097664, 0, 3220, 17039872, 0, 3220, 17039872, 0, 3220, 17039872, 0, 3224, 17039872, 0, 3224, 17039872, 0, 3224, 17039872, 0, 3236, 17039872, 0, 3236, 17039872, 0, 3236, 17039872, 0, 3240, 17039872, 0, 3240, 17039872, 0, 3240, 17039872, 0, 3252, 17039872, 0, 3252, 17039872, 0, 3252, 17039872, 0, 3256, 17039872, 0, 3256, 17039872, 0, 3256, 17039872, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 7488, 4, 0, 7232, 1, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 9344, 1, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 1, 0, 2004, 2097664, 0, 2004, 2097664, 0, 2008, 2097664, 0, 2008, 2097664, 0, 2020, 2097664, 0, 2020, 2097664, 0, 2024, 2097664, 0, 2024, 2097664, 0, 2036, 2097664, 0, 2036, 2097664, 0, 2040, 2097664, 0, 2040, 2097664, 0, 3220, 17039872, 0, 3220, 17039872, 0, 3220, 17039872, 0, 3224, 17039872, 0, 3224, 17039872, 0, 3224, 17039872, 0, 3236, 17039872, 0, 3236, 17039872, 0, 3236, 17039872, 0, 3240, 17039872, 0, 3240, 17039872, 0, 3240, 17039872, 0, 3252, 17039872, 0, 3252, 17039872, 0, 3252, 17039872, 0, 3256, 17039872, 0, 3256, 17039872, 0, 3256, 17039872, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 6016, 613566756, 0, 7488, 4, 0, 7232, 1, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 6848, 4294443008, 0, 9344, 1, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 4227858434, 0, 11648, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406936161369821_830_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406936161369821_830_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dec43384 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406936161369821_830_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,440 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 20)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 30)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 20))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 16)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((377 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (392 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 444 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 14144, 1, 0, 15808, 64, 0, 16960, 272696336, 0, 16960, 272696336, 0, 16960, 272696336, 0, 16960, 272696336, 0, 16960, 272696336, 0, 18688, 4, 0, 18704, 4, 0, 18720, 4, 0, 20160, 4, 0, 20176, 4, 0, 20192, 4, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 6656, 613566756, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8512, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8528, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 8544, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10112, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10128, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 10144, 21845, 0, 14144, 1, 0, 15808, 64, 0, 16960, 272696336, 0, 16960, 272696336, 0, 16960, 272696336, 0, 16960, 272696336, 0, 16960, 272696336, 0, 18688, 4, 0, 18704, 4, 0, 18720, 4, 0, 20160, 4, 0, 20176, 4, 0, 20192, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406979338858213_831_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406979338858213_831_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ed25788a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406979338858213_831_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 8)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756406979592060811_832_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756406979592060811_832_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6fad6630 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756406979592060811_832_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 5248, 570425344, 0, 5248, 570425344, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 576, 17, 0, 576, 17, 0, 5248, 570425344, 0, 5248, 570425344, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 5568, 1145324612, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0, 6016, 838860, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756407012178212844_836_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756407012178212844_836_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ae2ae6a1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756407012178212844_836_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,674 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 3: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 31))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((320 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (353 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((377 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (389 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (402 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((419 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((428 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (465 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (484 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (493 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (498 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (507 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((532 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((543 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (564 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (575 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (584 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 21)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (596 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((613 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((630 << 6) | (i7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((639 << 6) | (i7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((648 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (683 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (694 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (724 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (743 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (748 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (752 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 498 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 1, 0, 1184, 1, 0, 5392, 285212672, 0, 5392, 285212672, 0, 5408, 285212672, 0, 5408, 285212672, 0, 5840, 286261248, 0, 5840, 286261248, 0, 5840, 286261248, 0, 5856, 286261248, 0, 5856, 286261248, 0, 5856, 286261248, 0, 8000, 8192, 0, 9936, 33554432, 0, 9952, 33554432, 0, 9968, 33554432, 0, 15504, 102, 0, 15504, 102, 0, 15504, 102, 0, 15504, 102, 0, 15520, 102, 0, 15520, 102, 0, 15520, 102, 0, 15520, 102, 0, 16848, 33554432, 0, 16864, 33554432, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 22592, 65, 0, 22592, 65, 0, 23168, 272696336, 0, 23168, 272696336, 0, 23168, 272696336, 0, 23168, 272696336, 0, 23168, 272696336, 0, 25728, 73, 0, 25728, 73, 0, 25728, 73, 0, 32448, 272696336, 0, 32448, 272696336, 0, 32448, 272696336, 0, 32448, 272696336, 0, 32448, 272696336, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 1168, 1, 0, 1184, 1, 0, 5392, 285212672, 0, 5392, 285212672, 0, 5408, 285212672, 0, 5408, 285212672, 0, 5840, 286261248, 0, 5840, 286261248, 0, 5840, 286261248, 0, 5856, 286261248, 0, 5856, 286261248, 0, 5856, 286261248, 0, 8000, 8192, 0, 9936, 33554432, 0, 9952, 33554432, 0, 9968, 33554432, 0, 15504, 102, 0, 15504, 102, 0, 15504, 102, 0, 15504, 102, 0, 15520, 102, 0, 15520, 102, 0, 15520, 102, 0, 15520, 102, 0, 16848, 33554432, 0, 16864, 33554432, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 20496, 2863311530, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 21952, 1431655765, 0, 22592, 65, 0, 22592, 65, 0, 23168, 272696336, 0, 23168, 272696336, 0, 23168, 272696336, 0, 23168, 272696336, 0, 23168, 272696336, 0, 25728, 73, 0, 25728, 73, 0, 25728, 73, 0, 32448, 272696336, 0, 32448, 272696336, 0, 32448, 272696336, 0, 32448, 272696336, 0, 32448, 272696336, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0, 47872, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756407180891220467_838_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756407180891220467_838_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6c93e194 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756407180891220467_838_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,288 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((55 << 6) | (counter0 << 4)) | (i1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 29))) { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 24)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 534 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3536, 514, 0, 3536, 514, 0, 3537, 514, 0, 3537, 514, 0, 3538, 514, 0, 3538, 514, 0, 3540, 514, 0, 3540, 514, 0, 3541, 514, 0, 3541, 514, 0, 3542, 514, 0, 3542, 514, 0, 3544, 514, 0, 3544, 514, 0, 3545, 514, 0, 3545, 514, 0, 3546, 514, 0, 3546, 514, 0, 3552, 514, 0, 3552, 514, 0, 3553, 514, 0, 3553, 514, 0, 3554, 514, 0, 3554, 514, 0, 3556, 514, 0, 3556, 514, 0, 3557, 514, 0, 3557, 514, 0, 3558, 514, 0, 3558, 514, 0, 3560, 514, 0, 3560, 514, 0, 3561, 514, 0, 3561, 514, 0, 3562, 514, 0, 3562, 514, 0, 3984, 514, 0, 3984, 514, 0, 3988, 514, 0, 3988, 514, 0, 3992, 514, 0, 3992, 514, 0, 4000, 514, 0, 4000, 514, 0, 4004, 514, 0, 4004, 514, 0, 4008, 514, 0, 4008, 514, 0, 6160, 1082138752, 0, 6160, 1082138752, 0, 6160, 1082138752, 0, 6160, 1082138752, 0, 6176, 1082138752, 0, 6176, 1082138752, 0, 6176, 1082138752, 0, 6176, 1082138752, 0, 7952, 65536, 0, 7968, 65536, 0, 7984, 65536, 0, 10064, 268439552, 0, 10064, 268439552, 0, 10080, 268439552, 0, 10080, 268439552, 0, 10096, 268439552, 0, 10096, 268439552, 0, 15360, 559240, 0, 15360, 559240, 0, 15360, 559240, 0, 15360, 559240, 0, 15360, 559240, 0, 16256, 7, 0, 16256, 7, 0, 16256, 7, 0, 18496, 15, 0, 18496, 15, 0, 18496, 15, 0, 18496, 15, 0, 19648, 268435456, 0, 19664, 268435456, 0, 19680, 268435456, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 3536, 514, 0, 3536, 514, 0, 3537, 514, 0, 3537, 514, 0, 3538, 514, 0, 3538, 514, 0, 3540, 514, 0, 3540, 514, 0, 3541, 514, 0, 3541, 514, 0, 3542, 514, 0, 3542, 514, 0, 3544, 514, 0, 3544, 514, 0, 3545, 514, 0, 3545, 514, 0, 3546, 514, 0, 3546, 514, 0, 3552, 514, 0, 3552, 514, 0, 3553, 514, 0, 3553, 514, 0, 3554, 514, 0, 3554, 514, 0, 3556, 514, 0, 3556, 514, 0, 3557, 514, 0, 3557, 514, 0, 3558, 514, 0, 3558, 514, 0, 3560, 514, 0, 3560, 514, 0, 3561, 514, 0, 3561, 514, 0, 3562, 514, 0, 3562, 514, 0, 3984, 514, 0, 3984, 514, 0, 3988, 514, 0, 3988, 514, 0, 3992, 514, 0, 3992, 514, 0, 4000, 514, 0, 4000, 514, 0, 4004, 514, 0, 4004, 514, 0, 4008, 514, 0, 4008, 514, 0, 6160, 1082138752, 0, 6160, 1082138752, 0, 6160, 1082138752, 0, 6160, 1082138752, 0, 6176, 1082138752, 0, 6176, 1082138752, 0, 6176, 1082138752, 0, 6176, 1082138752, 0, 7952, 65536, 0, 7968, 65536, 0, 7984, 65536, 0, 10064, 268439552, 0, 10064, 268439552, 0, 10080, 268439552, 0, 10080, 268439552, 0, 10096, 268439552, 0, 10096, 268439552, 0, 15360, 559240, 0, 15360, 559240, 0, 15360, 559240, 0, 15360, 559240, 0, 15360, 559240, 0, 16256, 7, 0, 16256, 7, 0, 16256, 7, 0, 18496, 15, 0, 18496, 15, 0, 18496, 15, 0, 18496, 15, 0, 19648, 268435456, 0, 19664, 268435456, 0, 19680, 268435456, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0, 20352, 2863300608, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756407875425268058_843_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756407875425268058_843_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..466c85c5 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756407875425268058_843_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,340 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() >= 26)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((262 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3072, 1, 0, 3088, 1, 0, 3104, 1, 0, 8080, 2097664, 0, 8080, 2097664, 0, 8096, 2097664, 0, 8096, 2097664, 0, 9492, 262144, 0, 9508, 262144, 0, 10880, 4195328, 0, 10880, 4195328, 0, 13376, 604127268, 0, 13376, 604127268, 0, 13376, 604127268, 0, 13376, 604127268, 0, 13376, 604127268, 0, 13376, 604127268, 0, 16788, 2147483648, 0, 16792, 2147483648, 0, 16796, 2147483648, 0, 16804, 2147483648, 0, 16808, 2147483648, 0, 16812, 2147483648, 0, 16820, 2147483648, 0, 16824, 2147483648, 0, 16828, 2147483648, 0, 17408, 85, 0, 17408, 85, 0, 17408, 85, 0, 17408, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3072, 1, 0, 3088, 1, 0, 3104, 1, 0, 8080, 2097664, 0, 8080, 2097664, 0, 8096, 2097664, 0, 8096, 2097664, 0, 9492, 262144, 0, 9508, 262144, 0, 10880, 4195328, 0, 10880, 4195328, 0, 13376, 604127268, 0, 13376, 604127268, 0, 13376, 604127268, 0, 13376, 604127268, 0, 13376, 604127268, 0, 13376, 604127268, 0, 16788, 2147483648, 0, 16792, 2147483648, 0, 16796, 2147483648, 0, 16804, 2147483648, 0, 16808, 2147483648, 0, 16812, 2147483648, 0, 16820, 2147483648, 0, 16824, 2147483648, 0, 16828, 2147483648, 0, 17408, 85, 0, 17408, 85, 0, 17408, 85, 0, 17408, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756407925809335558_844_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756407925809335558_844_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7b89c312 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756407925809335558_844_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,325 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 18))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 18)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((280 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((289 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((293 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((302 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 486 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3840, 16777217, 0, 3840, 16777217, 0, 6400, 268435456, 0, 6416, 268435456, 0, 6720, 1048832, 0, 6720, 1048832, 0, 8768, 536870912, 0, 8784, 536870912, 0, 8800, 536870912, 0, 10240, 2, 0, 10244, 2, 0, 10256, 2, 0, 10260, 2, 0, 10272, 2, 0, 10276, 2, 0, 14016, 85, 0, 14016, 85, 0, 14016, 85, 0, 14016, 85, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 16768, 537395200, 0, 16768, 537395200, 0, 16784, 537395200, 0, 16784, 537395200, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 3840, 16777217, 0, 3840, 16777217, 0, 6400, 268435456, 0, 6416, 268435456, 0, 6720, 1048832, 0, 6720, 1048832, 0, 8768, 536870912, 0, 8784, 536870912, 0, 8800, 536870912, 0, 10240, 2, 0, 10244, 2, 0, 10256, 2, 0, 10260, 2, 0, 10272, 2, 0, 10276, 2, 0, 14016, 85, 0, 14016, 85, 0, 14016, 85, 0, 14016, 85, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 14592, 1431655765, 0, 16768, 537395200, 0, 16768, 537395200, 0, 16784, 537395200, 0, 16784, 537395200, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19328, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19332, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19336, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19344, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19348, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0, 19352, 2863136768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756408051402332430_845_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756408051402332430_845_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..68ae184b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756408051402332430_845_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756408052154104216_846_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756408052154104216_846_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8588d61e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756408052154104216_846_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 16))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 7424, 67108866, 0, 7424, 67108866, 0, 7440, 67108866, 0, 7440, 67108866, 0, 9024, 142608385, 0, 9024, 142608385, 0, 9024, 142608385, 0, 9024, 142608385, 0, 9040, 142608385, 0, 9040, 142608385, 0, 9040, 142608385, 0, 9040, 142608385, 0, 9664, 85, 0, 9664, 85, 0, 9664, 85, 0, 9664, 85, 0, 9680, 85, 0, 9680, 85, 0, 9680, 85, 0, 9680, 85, 0, 14016, 2860515328, 0, 14016, 2860515328, 0, 14016, 2860515328, 0, 14016, 2860515328, 0, 14016, 2860515328, 0, 14032, 2860515328, 0, 14032, 2860515328, 0, 14032, 2860515328, 0, 14032, 2860515328, 0, 14032, 2860515328, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 7424, 67108866, 0, 7424, 67108866, 0, 7440, 67108866, 0, 7440, 67108866, 0, 9024, 142608385, 0, 9024, 142608385, 0, 9024, 142608385, 0, 9024, 142608385, 0, 9040, 142608385, 0, 9040, 142608385, 0, 9040, 142608385, 0, 9040, 142608385, 0, 9664, 85, 0, 9664, 85, 0, 9664, 85, 0, 9664, 85, 0, 9680, 85, 0, 9680, 85, 0, 9680, 85, 0, 9680, 85, 0, 14016, 2860515328, 0, 14016, 2860515328, 0, 14016, 2860515328, 0, 14016, 2860515328, 0, 14016, 2860515328, 0, 14032, 2860515328, 0, 14032, 2860515328, 0, 14032, 2860515328, 0, 14032, 2860515328, 0, 14032, 2860515328, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756408075124782776_847_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756408075124782776_847_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f2c86bd7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756408075124782776_847_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,365 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((168 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((287 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((297 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((306 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((310 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((i5 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 1073741825, 0, 1600, 1073741825, 0, 5312, 272696336, 0, 5312, 272696336, 0, 5312, 272696336, 0, 5312, 272696336, 0, 5312, 272696336, 0, 5632, 68174084, 0, 5632, 68174084, 0, 5632, 68174084, 0, 5632, 68174084, 0, 5632, 68174084, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0, 10772, 16777216, 0, 10776, 16777216, 0, 10788, 16777216, 0, 10792, 16777216, 0, 10804, 16777216, 0, 10808, 16777216, 0, 11220, 1, 0, 11224, 1, 0, 11236, 1, 0, 11240, 1, 0, 11252, 1, 0, 11256, 1, 0, 13248, 268501008, 0, 13248, 268501008, 0, 13248, 268501008, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 1600, 1073741825, 0, 1600, 1073741825, 0, 5312, 272696336, 0, 5312, 272696336, 0, 5312, 272696336, 0, 5312, 272696336, 0, 5312, 272696336, 0, 5632, 68174084, 0, 5632, 68174084, 0, 5632, 68174084, 0, 5632, 68174084, 0, 5632, 68174084, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0, 10772, 16777216, 0, 10776, 16777216, 0, 10788, 16777216, 0, 10792, 16777216, 0, 10804, 16777216, 0, 10808, 16777216, 0, 11220, 1, 0, 11224, 1, 0, 11236, 1, 0, 11240, 1, 0, 11252, 1, 0, 11256, 1, 0, 13248, 268501008, 0, 13248, 268501008, 0, 13248, 268501008, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 16256, 1145324612, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18384, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18388, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18400, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0, 18404, 2290649224, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756408140237345551_848_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756408140237345551_848_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..37ba89f1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756408140237345551_848_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,280 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 24)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 3))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 27)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((218 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((225 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 390 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1664, 8, 0, 2240, 136348168, 0, 2240, 136348168, 0, 2240, 136348168, 0, 2240, 136348168, 0, 2240, 136348168, 0, 3712, 1, 0, 4672, 1090519040, 0, 4672, 1090519040, 0, 4688, 1090519040, 0, 4688, 1090519040, 0, 5120, 1073741824, 0, 5136, 1073741824, 0, 6208, 1, 0, 8064, 272696336, 0, 8064, 272696336, 0, 8064, 272696336, 0, 8064, 272696336, 0, 8064, 272696336, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 11648, 4194304, 0, 12864, 1073741834, 0, 12864, 1073741834, 0, 12864, 1073741834, 0, 12880, 1073741834, 0, 12880, 1073741834, 0, 12880, 1073741834, 0, 14980, 1077936384, 0, 14980, 1077936384, 0, 14980, 1077936384, 0, 14984, 1077936384, 0, 14984, 1077936384, 0, 14984, 1077936384, 0, 14988, 1077936384, 0, 14988, 1077936384, 0, 14988, 1077936384, 0, 14996, 1077936384, 0, 14996, 1077936384, 0, 14996, 1077936384, 0, 15000, 1077936384, 0, 15000, 1077936384, 0, 15000, 1077936384, 0, 15004, 1077936384, 0, 15004, 1077936384, 0, 15004, 1077936384, 0, 15428, 4194304, 0, 15432, 4194304, 0, 15436, 4194304, 0, 15444, 4194304, 0, 15448, 4194304, 0, 15452, 4194304, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1024, 136348168, 0, 1664, 8, 0, 2240, 136348168, 0, 2240, 136348168, 0, 2240, 136348168, 0, 2240, 136348168, 0, 2240, 136348168, 0, 3712, 1, 0, 4672, 1090519040, 0, 4672, 1090519040, 0, 4688, 1090519040, 0, 4688, 1090519040, 0, 5120, 1073741824, 0, 5136, 1073741824, 0, 6208, 1, 0, 8064, 272696336, 0, 8064, 272696336, 0, 8064, 272696336, 0, 8064, 272696336, 0, 8064, 272696336, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 8704, 613566756, 0, 11648, 4194304, 0, 12864, 1073741834, 0, 12864, 1073741834, 0, 12864, 1073741834, 0, 12880, 1073741834, 0, 12880, 1073741834, 0, 12880, 1073741834, 0, 14980, 1077936384, 0, 14980, 1077936384, 0, 14980, 1077936384, 0, 14984, 1077936384, 0, 14984, 1077936384, 0, 14984, 1077936384, 0, 14988, 1077936384, 0, 14988, 1077936384, 0, 14988, 1077936384, 0, 14996, 1077936384, 0, 14996, 1077936384, 0, 14996, 1077936384, 0, 15000, 1077936384, 0, 15000, 1077936384, 0, 15000, 1077936384, 0, 15004, 1077936384, 0, 15004, 1077936384, 0, 15004, 1077936384, 0, 15428, 4194304, 0, 15432, 4194304, 0, 15436, 4194304, 0, 15444, 4194304, 0, 15448, 4194304, 0, 15452, 4194304, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756408156005779480_849_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756408156005779480_849_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f088f50c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756408156005779480_849_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1090785345, 0, 1344, 1090785345, 0, 1344, 1090785345, 0, 1344, 1090785345, 0, 1344, 1090785345, 0, 1344, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 2432, 1, 0, 2448, 1, 0, 2880, 9, 0, 2880, 9, 0, 2896, 9, 0, 2896, 9, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 4032, 272696336, 0, 4032, 272696336, 0, 4032, 272696336, 0, 4032, 272696336, 0, 4032, 272696336, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 1344, 1090785345, 0, 1344, 1090785345, 0, 1344, 1090785345, 0, 1344, 1090785345, 0, 1344, 1090785345, 0, 1344, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 1360, 1090785345, 0, 2432, 1, 0, 2448, 1, 0, 2880, 9, 0, 2880, 9, 0, 2896, 9, 0, 2896, 9, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3456, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 3472, 136348168, 0, 4032, 272696336, 0, 4032, 272696336, 0, 4032, 272696336, 0, 4032, 272696336, 0, 4032, 272696336, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0, 10496, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756408952147262367_852_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756408952147262367_852_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f6fa122d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756408952147262367_852_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,450 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((368 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((427 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((453 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((472 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((479 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (486 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (490 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (501 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 594 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6272, 16, 0, 6288, 16, 0, 6304, 16, 0, 6912, 2449473536, 0, 6912, 2449473536, 0, 6912, 2449473536, 0, 6928, 2449473536, 0, 6928, 2449473536, 0, 6928, 2449473536, 0, 6944, 2449473536, 0, 6944, 2449473536, 0, 6944, 2449473536, 0, 8000, 2415919104, 0, 8000, 2415919104, 0, 8004, 2415919104, 0, 8004, 2415919104, 0, 8008, 2415919104, 0, 8008, 2415919104, 0, 8016, 2415919104, 0, 8016, 2415919104, 0, 8020, 2415919104, 0, 8020, 2415919104, 0, 8024, 2415919104, 0, 8024, 2415919104, 0, 8032, 2415919104, 0, 8032, 2415919104, 0, 8036, 2415919104, 0, 8036, 2415919104, 0, 8040, 2415919104, 0, 8040, 2415919104, 0, 8640, 2415919104, 0, 8640, 2415919104, 0, 8656, 2415919104, 0, 8656, 2415919104, 0, 8672, 2415919104, 0, 8672, 2415919104, 0, 9344, 18, 0, 9344, 18, 0, 9360, 18, 0, 9360, 18, 0, 9376, 18, 0, 9376, 18, 0, 11264, 128, 0, 11280, 128, 0, 11296, 128, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 23552, 33554432, 0, 23568, 33554432, 0, 26432, 2097152, 0, 30224, 536879104, 0, 30224, 536879104, 0, 30240, 536879104, 0, 30240, 536879104, 0, 31104, 559240, 0, 31104, 559240, 0, 31104, 559240, 0, 31104, 559240, 0, 31104, 559240, 0, 6272, 16, 0, 6288, 16, 0, 6304, 16, 0, 6912, 2449473536, 0, 6912, 2449473536, 0, 6912, 2449473536, 0, 6928, 2449473536, 0, 6928, 2449473536, 0, 6928, 2449473536, 0, 6944, 2449473536, 0, 6944, 2449473536, 0, 6944, 2449473536, 0, 8000, 2415919104, 0, 8000, 2415919104, 0, 8004, 2415919104, 0, 8004, 2415919104, 0, 8008, 2415919104, 0, 8008, 2415919104, 0, 8016, 2415919104, 0, 8016, 2415919104, 0, 8020, 2415919104, 0, 8020, 2415919104, 0, 8024, 2415919104, 0, 8024, 2415919104, 0, 8032, 2415919104, 0, 8032, 2415919104, 0, 8036, 2415919104, 0, 8036, 2415919104, 0, 8040, 2415919104, 0, 8040, 2415919104, 0, 8640, 2415919104, 0, 8640, 2415919104, 0, 8656, 2415919104, 0, 8656, 2415919104, 0, 8672, 2415919104, 0, 8672, 2415919104, 0, 9344, 18, 0, 9344, 18, 0, 9360, 18, 0, 9360, 18, 0, 9376, 18, 0, 9376, 18, 0, 11264, 128, 0, 11280, 128, 0, 11296, 128, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 11776, 613566756, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 12672, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 13248, 1431655765, 0, 23552, 33554432, 0, 23568, 33554432, 0, 26432, 2097152, 0, 30224, 536879104, 0, 30224, 536879104, 0, 30240, 536879104, 0, 30240, 536879104, 0, 31104, 559240, 0, 31104, 559240, 0, 31104, 559240, 0, 31104, 559240, 0, 31104, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409253767263905_854_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409253767263905_854_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..474a696e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409253767263905_854_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,101 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 900 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1924, 3758096384, 0, 1924, 3758096384, 0, 1924, 3758096384, 0, 1928, 3758096384, 0, 1928, 3758096384, 0, 1928, 3758096384, 0, 1940, 3758096384, 0, 1940, 3758096384, 0, 1940, 3758096384, 0, 1944, 3758096384, 0, 1944, 3758096384, 0, 1944, 3758096384, 0, 1956, 3758096384, 0, 1956, 3758096384, 0, 1956, 3758096384, 0, 1960, 3758096384, 0, 1960, 3758096384, 0, 1960, 3758096384, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1056, 2863311530, 0, 1924, 3758096384, 0, 1924, 3758096384, 0, 1924, 3758096384, 0, 1928, 3758096384, 0, 1928, 3758096384, 0, 1928, 3758096384, 0, 1940, 3758096384, 0, 1940, 3758096384, 0, 1940, 3758096384, 0, 1944, 3758096384, 0, 1944, 3758096384, 0, 1944, 3758096384, 0, 1956, 3758096384, 0, 1956, 3758096384, 0, 1956, 3758096384, 0, 1960, 3758096384, 0, 1960, 3758096384, 0, 1960, 3758096384, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3012, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3016, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3028, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3032, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3044, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0, 3048, 4294705152, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409320166735638_856_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409320166735638_856_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1f598254 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409320166735638_856_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 378 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 17, 0, 2688, 17, 0, 2692, 17, 0, 2692, 17, 0, 2696, 17, 0, 2696, 17, 0, 2704, 17, 0, 2704, 17, 0, 2708, 17, 0, 2708, 17, 0, 2712, 17, 0, 2712, 17, 0, 2720, 17, 0, 2720, 17, 0, 2724, 17, 0, 2724, 17, 0, 2728, 17, 0, 2728, 17, 0, 3520, 285212928, 0, 3520, 285212928, 0, 3520, 285212928, 0, 3524, 285212928, 0, 3524, 285212928, 0, 3524, 285212928, 0, 3528, 285212928, 0, 3528, 285212928, 0, 3528, 285212928, 0, 3536, 285212928, 0, 3536, 285212928, 0, 3536, 285212928, 0, 3540, 285212928, 0, 3540, 285212928, 0, 3540, 285212928, 0, 3544, 285212928, 0, 3544, 285212928, 0, 3544, 285212928, 0, 3552, 285212928, 0, 3552, 285212928, 0, 3552, 285212928, 0, 3556, 285212928, 0, 3556, 285212928, 0, 3556, 285212928, 0, 3560, 285212928, 0, 3560, 285212928, 0, 3560, 285212928, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 2688, 17, 0, 2688, 17, 0, 2692, 17, 0, 2692, 17, 0, 2696, 17, 0, 2696, 17, 0, 2704, 17, 0, 2704, 17, 0, 2708, 17, 0, 2708, 17, 0, 2712, 17, 0, 2712, 17, 0, 2720, 17, 0, 2720, 17, 0, 2724, 17, 0, 2724, 17, 0, 2728, 17, 0, 2728, 17, 0, 3520, 285212928, 0, 3520, 285212928, 0, 3520, 285212928, 0, 3524, 285212928, 0, 3524, 285212928, 0, 3524, 285212928, 0, 3528, 285212928, 0, 3528, 285212928, 0, 3528, 285212928, 0, 3536, 285212928, 0, 3536, 285212928, 0, 3536, 285212928, 0, 3540, 285212928, 0, 3540, 285212928, 0, 3540, 285212928, 0, 3544, 285212928, 0, 3544, 285212928, 0, 3544, 285212928, 0, 3552, 285212928, 0, 3552, 285212928, 0, 3552, 285212928, 0, 3556, 285212928, 0, 3556, 285212928, 0, 3556, 285212928, 0, 3560, 285212928, 0, 3560, 285212928, 0, 3560, 285212928, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5312, 1145324612, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0, 5760, 838860, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409348010917437_857_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409348010917437_857_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b4d53b42 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409348010917437_857_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6528, 559240, 0, 6528, 559240, 0, 6528, 559240, 0, 6528, 559240, 0, 6528, 559240, 0, 576, 17, 0, 576, 17, 0, 6528, 559240, 0, 6528, 559240, 0, 6528, 559240, 0, 6528, 559240, 0, 6528, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409348744140025_858_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409348744140025_858_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..25fe4b35 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409348744140025_858_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,127 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 4160, 8192, 0, 3776, 1073873416, 0, 3776, 1073873416, 0, 3776, 1073873416, 0, 3776, 1073873416, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 2368, 1431655765, 0, 4160, 8192, 0, 3776, 1073873416, 0, 3776, 1073873416, 0, 3776, 1073873416, 0, 3776, 1073873416, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409680829170309_860_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409680829170309_860_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..da4f589f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409680829170309_860_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,377 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((187 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((308 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 840 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2112, 262144, 0, 2128, 262144, 0, 2144, 262144, 0, 3264, 1073741828, 0, 3264, 1073741828, 0, 3280, 1073741828, 0, 3280, 1073741828, 0, 3296, 1073741828, 0, 3296, 1073741828, 0, 3968, 1073741824, 0, 3984, 1073741824, 0, 4000, 1073741824, 0, 5184, 262144, 0, 5200, 262144, 0, 5216, 262144, 0, 6400, 1024, 0, 6416, 1024, 0, 6432, 1024, 0, 7616, 559240, 0, 7616, 559240, 0, 7616, 559240, 0, 7616, 559240, 0, 7616, 559240, 0, 8256, 17, 0, 8256, 17, 0, 11396, 64, 0, 11400, 64, 0, 11404, 64, 0, 11412, 64, 0, 11416, 64, 0, 11420, 64, 0, 11428, 64, 0, 11432, 64, 0, 11436, 64, 0, 11972, 4195328, 0, 11972, 4195328, 0, 11976, 4195328, 0, 11976, 4195328, 0, 11980, 4195328, 0, 11980, 4195328, 0, 11988, 4195328, 0, 11988, 4195328, 0, 11992, 4195328, 0, 11992, 4195328, 0, 11996, 4195328, 0, 11996, 4195328, 0, 12004, 4195328, 0, 12004, 4195328, 0, 12008, 4195328, 0, 12008, 4195328, 0, 12012, 4195328, 0, 12012, 4195328, 0, 12292, 67125252, 0, 12292, 67125252, 0, 12292, 67125252, 0, 12296, 67125252, 0, 12296, 67125252, 0, 12296, 67125252, 0, 12300, 67125252, 0, 12300, 67125252, 0, 12300, 67125252, 0, 12308, 67125252, 0, 12308, 67125252, 0, 12308, 67125252, 0, 12312, 67125252, 0, 12312, 67125252, 0, 12312, 67125252, 0, 12316, 67125252, 0, 12316, 67125252, 0, 12316, 67125252, 0, 12324, 67125252, 0, 12324, 67125252, 0, 12324, 67125252, 0, 12328, 67125252, 0, 12328, 67125252, 0, 12328, 67125252, 0, 12332, 67125252, 0, 12332, 67125252, 0, 12332, 67125252, 0, 13380, 1145044996, 0, 13380, 1145044996, 0, 13380, 1145044996, 0, 13380, 1145044996, 0, 13384, 1145044996, 0, 13384, 1145044996, 0, 13384, 1145044996, 0, 13384, 1145044996, 0, 13388, 1145044996, 0, 13388, 1145044996, 0, 13388, 1145044996, 0, 13388, 1145044996, 0, 13396, 1145044996, 0, 13396, 1145044996, 0, 13396, 1145044996, 0, 13396, 1145044996, 0, 13400, 1145044996, 0, 13400, 1145044996, 0, 13400, 1145044996, 0, 13400, 1145044996, 0, 13404, 1145044996, 0, 13404, 1145044996, 0, 13404, 1145044996, 0, 13404, 1145044996, 0, 13412, 1145044996, 0, 13412, 1145044996, 0, 13412, 1145044996, 0, 13412, 1145044996, 0, 13416, 1145044996, 0, 13416, 1145044996, 0, 13416, 1145044996, 0, 13416, 1145044996, 0, 13420, 1145044996, 0, 13420, 1145044996, 0, 13420, 1145044996, 0, 13420, 1145044996, 0, 13824, 559240, 0, 13824, 559240, 0, 13824, 559240, 0, 13824, 559240, 0, 13824, 559240, 0, 14912, 73, 0, 14912, 73, 0, 14912, 73, 0, 18240, 272696336, 0, 18240, 272696336, 0, 18240, 272696336, 0, 18240, 272696336, 0, 18240, 272696336, 0, 19728, 545392672, 0, 19728, 545392672, 0, 19728, 545392672, 0, 19728, 545392672, 0, 19728, 545392672, 0, 19744, 545392672, 0, 19744, 545392672, 0, 19744, 545392672, 0, 19744, 545392672, 0, 19744, 545392672, 0, 576, 17, 0, 576, 17, 0, 2112, 262144, 0, 2128, 262144, 0, 2144, 262144, 0, 3264, 1073741828, 0, 3264, 1073741828, 0, 3280, 1073741828, 0, 3280, 1073741828, 0, 3296, 1073741828, 0, 3296, 1073741828, 0, 3968, 1073741824, 0, 3984, 1073741824, 0, 4000, 1073741824, 0, 5184, 262144, 0, 5200, 262144, 0, 5216, 262144, 0, 6400, 1024, 0, 6416, 1024, 0, 6432, 1024, 0, 7616, 559240, 0, 7616, 559240, 0, 7616, 559240, 0, 7616, 559240, 0, 7616, 559240, 0, 8256, 17, 0, 8256, 17, 0, 11396, 64, 0, 11400, 64, 0, 11404, 64, 0, 11412, 64, 0, 11416, 64, 0, 11420, 64, 0, 11428, 64, 0, 11432, 64, 0, 11436, 64, 0, 11972, 4195328, 0, 11972, 4195328, 0, 11976, 4195328, 0, 11976, 4195328, 0, 11980, 4195328, 0, 11980, 4195328, 0, 11988, 4195328, 0, 11988, 4195328, 0, 11992, 4195328, 0, 11992, 4195328, 0, 11996, 4195328, 0, 11996, 4195328, 0, 12004, 4195328, 0, 12004, 4195328, 0, 12008, 4195328, 0, 12008, 4195328, 0, 12012, 4195328, 0, 12012, 4195328, 0, 12292, 67125252, 0, 12292, 67125252, 0, 12292, 67125252, 0, 12296, 67125252, 0, 12296, 67125252, 0, 12296, 67125252, 0, 12300, 67125252, 0, 12300, 67125252, 0, 12300, 67125252, 0, 12308, 67125252, 0, 12308, 67125252, 0, 12308, 67125252, 0, 12312, 67125252, 0, 12312, 67125252, 0, 12312, 67125252, 0, 12316, 67125252, 0, 12316, 67125252, 0, 12316, 67125252, 0, 12324, 67125252, 0, 12324, 67125252, 0, 12324, 67125252, 0, 12328, 67125252, 0, 12328, 67125252, 0, 12328, 67125252, 0, 12332, 67125252, 0, 12332, 67125252, 0, 12332, 67125252, 0, 13380, 1145044996, 0, 13380, 1145044996, 0, 13380, 1145044996, 0, 13380, 1145044996, 0, 13384, 1145044996, 0, 13384, 1145044996, 0, 13384, 1145044996, 0, 13384, 1145044996, 0, 13388, 1145044996, 0, 13388, 1145044996, 0, 13388, 1145044996, 0, 13388, 1145044996, 0, 13396, 1145044996, 0, 13396, 1145044996, 0, 13396, 1145044996, 0, 13396, 1145044996, 0, 13400, 1145044996, 0, 13400, 1145044996, 0, 13400, 1145044996, 0, 13400, 1145044996, 0, 13404, 1145044996, 0, 13404, 1145044996, 0, 13404, 1145044996, 0, 13404, 1145044996, 0, 13412, 1145044996, 0, 13412, 1145044996, 0, 13412, 1145044996, 0, 13412, 1145044996, 0, 13416, 1145044996, 0, 13416, 1145044996, 0, 13416, 1145044996, 0, 13416, 1145044996, 0, 13420, 1145044996, 0, 13420, 1145044996, 0, 13420, 1145044996, 0, 13420, 1145044996, 0, 13824, 559240, 0, 13824, 559240, 0, 13824, 559240, 0, 13824, 559240, 0, 13824, 559240, 0, 14912, 73, 0, 14912, 73, 0, 14912, 73, 0, 18240, 272696336, 0, 18240, 272696336, 0, 18240, 272696336, 0, 18240, 272696336, 0, 18240, 272696336, 0, 19728, 545392672, 0, 19728, 545392672, 0, 19728, 545392672, 0, 19728, 545392672, 0, 19728, 545392672, 0, 19744, 545392672, 0, 19744, 545392672, 0, 19744, 545392672, 0, 19744, 545392672, 0, 19744, 545392672, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409804795533124_863_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409804795533124_863_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2ed98426 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409804795533124_863_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 134381576, 0, 2304, 134381576, 0, 2304, 134381576, 0, 2304, 134381576, 0, 2048, 33554688, 0, 2048, 33554688, 0, 1792, 4026531840, 0, 1792, 4026531840, 0, 1792, 4026531840, 0, 1792, 4026531840, 0, 2304, 134381576, 0, 2304, 134381576, 0, 2304, 134381576, 0, 2304, 134381576, 0, 2048, 33554688, 0, 2048, 33554688, 0, 1792, 4026531840, 0, 1792, 4026531840, 0, 1792, 4026531840, 0, 1792, 4026531840, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409805051607567_864_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409805051607567_864_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cd22d2a2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409805051607567_864_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4676, 2099232, 0, 4676, 2099232, 0, 4676, 2099232, 0, 4680, 2099232, 0, 4680, 2099232, 0, 4680, 2099232, 0, 4692, 2099232, 0, 4692, 2099232, 0, 4692, 2099232, 0, 4696, 2099232, 0, 4696, 2099232, 0, 4696, 2099232, 0, 4708, 2099232, 0, 4708, 2099232, 0, 4708, 2099232, 0, 4712, 2099232, 0, 4712, 2099232, 0, 4712, 2099232, 0, 4676, 2099232, 0, 4676, 2099232, 0, 4676, 2099232, 0, 4680, 2099232, 0, 4680, 2099232, 0, 4680, 2099232, 0, 4692, 2099232, 0, 4692, 2099232, 0, 4692, 2099232, 0, 4696, 2099232, 0, 4696, 2099232, 0, 4696, 2099232, 0, 4708, 2099232, 0, 4708, 2099232, 0, 4708, 2099232, 0, 4712, 2099232, 0, 4712, 2099232, 0, 4712, 2099232, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409805755296658_865_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409805755296658_865_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7011c8db --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409805755296658_865_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,325 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 6)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 486 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 8272, 32, 0, 8288, 32, 0, 8304, 32, 0, 11472, 536871424, 0, 11472, 536871424, 0, 11488, 536871424, 0, 11488, 536871424, 0, 11504, 536871424, 0, 11504, 536871424, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 13504, 2113538, 0, 13504, 2113538, 0, 13504, 2113538, 0, 13520, 2113538, 0, 13520, 2113538, 0, 13520, 2113538, 0, 13536, 2113538, 0, 13536, 2113538, 0, 13536, 2113538, 0, 15376, 1, 0, 15392, 1, 0, 15408, 1, 0, 16704, 8, 0, 17408, 9, 0, 17408, 9, 0, 17984, 272696336, 0, 17984, 272696336, 0, 17984, 272696336, 0, 17984, 272696336, 0, 17984, 272696336, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 19200, 73, 0, 19200, 73, 0, 19200, 73, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 576, 17, 0, 576, 17, 0, 8272, 32, 0, 8288, 32, 0, 8304, 32, 0, 11472, 536871424, 0, 11472, 536871424, 0, 11488, 536871424, 0, 11488, 536871424, 0, 11504, 536871424, 0, 11504, 536871424, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 11776, 1717986918, 0, 13504, 2113538, 0, 13504, 2113538, 0, 13504, 2113538, 0, 13520, 2113538, 0, 13520, 2113538, 0, 13520, 2113538, 0, 13536, 2113538, 0, 13536, 2113538, 0, 13536, 2113538, 0, 15376, 1, 0, 15392, 1, 0, 15408, 1, 0, 16704, 8, 0, 17408, 9, 0, 17408, 9, 0, 17984, 272696336, 0, 17984, 272696336, 0, 17984, 272696336, 0, 17984, 272696336, 0, 17984, 272696336, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 18304, 613566756, 0, 19200, 73, 0, 19200, 73, 0, 19200, 73, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 19776, 1363481681, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0, 20096, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409977069531383_868_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409977069531383_868_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dac7e2ee --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409977069531383_868_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,113 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2064, 16809984, 0, 2064, 16809984, 0, 2080, 16809984, 0, 2080, 16809984, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3904, 272696336, 0, 3904, 272696336, 0, 3904, 272696336, 0, 3904, 272696336, 0, 3904, 272696336, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 2064, 16809984, 0, 2064, 16809984, 0, 2080, 16809984, 0, 2080, 16809984, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3152, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3156, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3160, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3168, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3172, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3176, 1090785345, 0, 3904, 272696336, 0, 3904, 272696336, 0, 3904, 272696336, 0, 3904, 272696336, 0, 3904, 272696336, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0, 4224, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756409980043028278_869_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756409980043028278_869_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e4c2cbbb --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756409980043028278_869_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,403 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((323 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 21))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 660 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 4416, 9, 0, 4416, 9, 0, 4420, 9, 0, 4420, 9, 0, 4424, 9, 0, 4424, 9, 0, 4432, 9, 0, 4432, 9, 0, 4436, 9, 0, 4436, 9, 0, 4440, 9, 0, 4440, 9, 0, 4864, 73, 0, 4864, 73, 0, 4864, 73, 0, 4868, 73, 0, 4868, 73, 0, 4868, 73, 0, 4872, 73, 0, 4872, 73, 0, 4872, 73, 0, 4880, 73, 0, 4880, 73, 0, 4880, 73, 0, 4884, 73, 0, 4884, 73, 0, 4884, 73, 0, 4888, 73, 0, 4888, 73, 0, 4888, 73, 0, 6272, 16, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 9088, 32768, 0, 9104, 32768, 0, 9120, 32768, 0, 10500, 18874368, 0, 10500, 18874368, 0, 10516, 18874368, 0, 10516, 18874368, 0, 10532, 18874368, 0, 10532, 18874368, 0, 11716, 134217728, 0, 11732, 134217728, 0, 11748, 134217728, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 17984, 134742017, 0, 17984, 134742017, 0, 17984, 134742017, 0, 18624, 17, 0, 18624, 17, 0, 20112, 68, 0, 20112, 68, 0, 20128, 68, 0, 20128, 68, 0, 20144, 68, 0, 20144, 68, 0, 20688, 68, 0, 20688, 68, 0, 20704, 68, 0, 20704, 68, 0, 20720, 68, 0, 20720, 68, 0, 25280, 2147483784, 0, 25280, 2147483784, 0, 25280, 2147483784, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 2304, 3221225599, 0, 4416, 9, 0, 4416, 9, 0, 4420, 9, 0, 4420, 9, 0, 4424, 9, 0, 4424, 9, 0, 4432, 9, 0, 4432, 9, 0, 4436, 9, 0, 4436, 9, 0, 4440, 9, 0, 4440, 9, 0, 4864, 73, 0, 4864, 73, 0, 4864, 73, 0, 4868, 73, 0, 4868, 73, 0, 4868, 73, 0, 4872, 73, 0, 4872, 73, 0, 4872, 73, 0, 4880, 73, 0, 4880, 73, 0, 4880, 73, 0, 4884, 73, 0, 4884, 73, 0, 4884, 73, 0, 4888, 73, 0, 4888, 73, 0, 4888, 73, 0, 6272, 16, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 6592, 2147483830, 0, 9088, 32768, 0, 9104, 32768, 0, 9120, 32768, 0, 10500, 18874368, 0, 10500, 18874368, 0, 10516, 18874368, 0, 10516, 18874368, 0, 10532, 18874368, 0, 10532, 18874368, 0, 11716, 134217728, 0, 11732, 134217728, 0, 11748, 134217728, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 14592, 1073741568, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 15552, 1056964608, 0, 17984, 134742017, 0, 17984, 134742017, 0, 17984, 134742017, 0, 18624, 17, 0, 18624, 17, 0, 20112, 68, 0, 20112, 68, 0, 20128, 68, 0, 20128, 68, 0, 20144, 68, 0, 20144, 68, 0, 20688, 68, 0, 20688, 68, 0, 20704, 68, 0, 20704, 68, 0, 20720, 68, 0, 20720, 68, 0, 25280, 2147483784, 0, 25280, 2147483784, 0, 25280, 2147483784, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756410119300535326_871_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756410119300535326_871_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cda8c721 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756410119300535326_871_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,160 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 31))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 21))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2704, 32, 0, 2720, 32, 0, 2736, 32, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 8848, 4, 0, 8864, 4, 0, 2704, 32, 0, 2720, 32, 0, 2736, 32, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7504, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 7520, 4026532095, 0, 8848, 4, 0, 8864, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756410186640944616_874_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756410186640944616_874_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..74596a53 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756410186640944616_874_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 18)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((200 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 696 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5248, 153092096, 0, 5248, 153092096, 0, 5248, 153092096, 0, 5264, 153092096, 0, 5264, 153092096, 0, 5264, 153092096, 0, 5280, 153092096, 0, 5280, 153092096, 0, 5280, 153092096, 0, 8384, 585, 0, 8384, 585, 0, 8384, 585, 0, 8384, 585, 0, 8400, 585, 0, 8400, 585, 0, 8400, 585, 0, 8400, 585, 0, 8416, 585, 0, 8416, 585, 0, 8416, 585, 0, 8416, 585, 0, 9408, 272696336, 0, 9408, 272696336, 0, 9408, 272696336, 0, 9408, 272696336, 0, 9408, 272696336, 0, 10368, 2340, 0, 10368, 2340, 0, 10368, 2340, 0, 10368, 2340, 0, 10384, 2340, 0, 10384, 2340, 0, 10384, 2340, 0, 10384, 2340, 0, 10400, 2340, 0, 10400, 2340, 0, 10400, 2340, 0, 10400, 2340, 0, 11780, 4, 0, 11784, 4, 0, 11796, 4, 0, 11800, 4, 0, 11812, 4, 0, 11816, 4, 0, 12804, 68174084, 0, 12804, 68174084, 0, 12804, 68174084, 0, 12804, 68174084, 0, 12804, 68174084, 0, 12808, 68174084, 0, 12808, 68174084, 0, 12808, 68174084, 0, 12808, 68174084, 0, 12808, 68174084, 0, 12820, 68174084, 0, 12820, 68174084, 0, 12820, 68174084, 0, 12820, 68174084, 0, 12820, 68174084, 0, 12824, 68174084, 0, 12824, 68174084, 0, 12824, 68174084, 0, 12824, 68174084, 0, 12824, 68174084, 0, 12836, 68174084, 0, 12836, 68174084, 0, 12836, 68174084, 0, 12836, 68174084, 0, 12836, 68174084, 0, 12840, 68174084, 0, 12840, 68174084, 0, 12840, 68174084, 0, 12840, 68174084, 0, 12840, 68174084, 0, 13380, 545392672, 0, 13380, 545392672, 0, 13380, 545392672, 0, 13380, 545392672, 0, 13380, 545392672, 0, 13384, 545392672, 0, 13384, 545392672, 0, 13384, 545392672, 0, 13384, 545392672, 0, 13384, 545392672, 0, 13396, 545392672, 0, 13396, 545392672, 0, 13396, 545392672, 0, 13396, 545392672, 0, 13396, 545392672, 0, 13400, 545392672, 0, 13400, 545392672, 0, 13400, 545392672, 0, 13400, 545392672, 0, 13400, 545392672, 0, 13412, 545392672, 0, 13412, 545392672, 0, 13412, 545392672, 0, 13412, 545392672, 0, 13412, 545392672, 0, 13416, 545392672, 0, 13416, 545392672, 0, 13416, 545392672, 0, 13416, 545392672, 0, 13416, 545392672, 0, 13824, 613416960, 0, 13824, 613416960, 0, 13824, 613416960, 0, 13824, 613416960, 0, 13840, 613416960, 0, 13840, 613416960, 0, 13840, 613416960, 0, 13840, 613416960, 0, 13856, 613416960, 0, 13856, 613416960, 0, 13856, 613416960, 0, 13856, 613416960, 0, 5248, 153092096, 0, 5248, 153092096, 0, 5248, 153092096, 0, 5264, 153092096, 0, 5264, 153092096, 0, 5264, 153092096, 0, 5280, 153092096, 0, 5280, 153092096, 0, 5280, 153092096, 0, 8384, 585, 0, 8384, 585, 0, 8384, 585, 0, 8384, 585, 0, 8400, 585, 0, 8400, 585, 0, 8400, 585, 0, 8400, 585, 0, 8416, 585, 0, 8416, 585, 0, 8416, 585, 0, 8416, 585, 0, 9408, 272696336, 0, 9408, 272696336, 0, 9408, 272696336, 0, 9408, 272696336, 0, 9408, 272696336, 0, 10368, 2340, 0, 10368, 2340, 0, 10368, 2340, 0, 10368, 2340, 0, 10384, 2340, 0, 10384, 2340, 0, 10384, 2340, 0, 10384, 2340, 0, 10400, 2340, 0, 10400, 2340, 0, 10400, 2340, 0, 10400, 2340, 0, 11780, 4, 0, 11784, 4, 0, 11796, 4, 0, 11800, 4, 0, 11812, 4, 0, 11816, 4, 0, 12804, 68174084, 0, 12804, 68174084, 0, 12804, 68174084, 0, 12804, 68174084, 0, 12804, 68174084, 0, 12808, 68174084, 0, 12808, 68174084, 0, 12808, 68174084, 0, 12808, 68174084, 0, 12808, 68174084, 0, 12820, 68174084, 0, 12820, 68174084, 0, 12820, 68174084, 0, 12820, 68174084, 0, 12820, 68174084, 0, 12824, 68174084, 0, 12824, 68174084, 0, 12824, 68174084, 0, 12824, 68174084, 0, 12824, 68174084, 0, 12836, 68174084, 0, 12836, 68174084, 0, 12836, 68174084, 0, 12836, 68174084, 0, 12836, 68174084, 0, 12840, 68174084, 0, 12840, 68174084, 0, 12840, 68174084, 0, 12840, 68174084, 0, 12840, 68174084, 0, 13380, 545392672, 0, 13380, 545392672, 0, 13380, 545392672, 0, 13380, 545392672, 0, 13380, 545392672, 0, 13384, 545392672, 0, 13384, 545392672, 0, 13384, 545392672, 0, 13384, 545392672, 0, 13384, 545392672, 0, 13396, 545392672, 0, 13396, 545392672, 0, 13396, 545392672, 0, 13396, 545392672, 0, 13396, 545392672, 0, 13400, 545392672, 0, 13400, 545392672, 0, 13400, 545392672, 0, 13400, 545392672, 0, 13400, 545392672, 0, 13412, 545392672, 0, 13412, 545392672, 0, 13412, 545392672, 0, 13412, 545392672, 0, 13412, 545392672, 0, 13416, 545392672, 0, 13416, 545392672, 0, 13416, 545392672, 0, 13416, 545392672, 0, 13416, 545392672, 0, 13824, 613416960, 0, 13824, 613416960, 0, 13824, 613416960, 0, 13824, 613416960, 0, 13840, 613416960, 0, 13840, 613416960, 0, 13840, 613416960, 0, 13840, 613416960, 0, 13856, 613416960, 0, 13856, 613416960, 0, 13856, 613416960, 0, 13856, 613416960, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756410203619641258_875_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756410203619641258_875_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..78ed86a1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756410203619641258_875_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((262 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2320, 2147483666, 0, 2320, 2147483666, 0, 2320, 2147483666, 0, 2336, 2147483666, 0, 2336, 2147483666, 0, 2336, 2147483666, 0, 2352, 2147483666, 0, 2352, 2147483666, 0, 2352, 2147483666, 0, 3024, 2147483648, 0, 3040, 2147483648, 0, 3056, 2147483648, 0, 3600, 272696336, 0, 3600, 272696336, 0, 3600, 272696336, 0, 3600, 272696336, 0, 3600, 272696336, 0, 3616, 272696336, 0, 3616, 272696336, 0, 3616, 272696336, 0, 3616, 272696336, 0, 3616, 272696336, 0, 3632, 272696336, 0, 3632, 272696336, 0, 3632, 272696336, 0, 3632, 272696336, 0, 3632, 272696336, 0, 6912, 536870916, 0, 6912, 536870916, 0, 7616, 536870916, 0, 7616, 536870916, 0, 9408, 17, 0, 9408, 17, 0, 10560, 570425378, 0, 10560, 570425378, 0, 10560, 570425378, 0, 10560, 570425378, 0, 12352, 570425344, 0, 12352, 570425344, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2320, 2147483666, 0, 2320, 2147483666, 0, 2320, 2147483666, 0, 2336, 2147483666, 0, 2336, 2147483666, 0, 2336, 2147483666, 0, 2352, 2147483666, 0, 2352, 2147483666, 0, 2352, 2147483666, 0, 3024, 2147483648, 0, 3040, 2147483648, 0, 3056, 2147483648, 0, 3600, 272696336, 0, 3600, 272696336, 0, 3600, 272696336, 0, 3600, 272696336, 0, 3600, 272696336, 0, 3616, 272696336, 0, 3616, 272696336, 0, 3616, 272696336, 0, 3616, 272696336, 0, 3616, 272696336, 0, 3632, 272696336, 0, 3632, 272696336, 0, 3632, 272696336, 0, 3632, 272696336, 0, 3632, 272696336, 0, 6912, 536870916, 0, 6912, 536870916, 0, 7616, 536870916, 0, 7616, 536870916, 0, 9408, 17, 0, 9408, 17, 0, 10560, 570425378, 0, 10560, 570425378, 0, 10560, 570425378, 0, 10560, 570425378, 0, 12352, 570425344, 0, 12352, 570425344, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 14848, 1145324612, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16788, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0, 16804, 2290649224, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756410224324088061_876_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756410224324088061_876_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..15705484 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756410224324088061_876_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6784, 2048, 0, 8128, 1409286145, 0, 8128, 1409286145, 0, 8128, 1409286145, 0, 8128, 1409286145, 0, 6784, 2048, 0, 8128, 1409286145, 0, 8128, 1409286145, 0, 8128, 1409286145, 0, 8128, 1409286145, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756410657446504531_878_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756410657446504531_878_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b5593cf4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756410657446504531_878_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,465 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((270 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((334 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 5))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((400 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((409 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 14))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (443 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((472 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + } + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (509 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((527 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((540 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter7 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (566 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (573 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((592 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((603 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i8 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 564 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 8, 0, 3520, 8397840, 0, 3520, 8397840, 0, 3520, 8397840, 0, 3520, 8397840, 0, 3136, 2147483651, 0, 3136, 2147483651, 0, 3136, 2147483651, 0, 2880, 1074266240, 0, 2880, 1074266240, 0, 2880, 1074266240, 0, 4608, 73, 0, 4608, 73, 0, 4608, 73, 0, 8896, 524288, 0, 11200, 524288, 0, 11216, 524288, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 14400, 3, 0, 14400, 3, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 17280, 536870916, 0, 17280, 536870916, 0, 17284, 536870916, 0, 17284, 536870916, 0, 17288, 536870916, 0, 17288, 536870916, 0, 17296, 536870916, 0, 17296, 536870916, 0, 17300, 536870916, 0, 17300, 536870916, 0, 17304, 536870916, 0, 17304, 536870916, 0, 19648, 273, 0, 19648, 273, 0, 19648, 273, 0, 22272, 273, 0, 22272, 273, 0, 22272, 273, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 25616, 32, 0, 25632, 32, 0, 28352, 16384, 0, 32576, 32768, 0, 36672, 8, 0, 37888, 2281701384, 0, 37888, 2281701384, 0, 37888, 2281701384, 0, 37904, 2281701384, 0, 37904, 2281701384, 0, 37904, 2281701384, 0, 38592, 2281701384, 0, 38592, 2281701384, 0, 38592, 2281701384, 0, 38608, 2281701384, 0, 38608, 2281701384, 0, 38608, 2281701384, 0, 3776, 8, 0, 3520, 8397840, 0, 3520, 8397840, 0, 3520, 8397840, 0, 3520, 8397840, 0, 3136, 2147483651, 0, 3136, 2147483651, 0, 3136, 2147483651, 0, 2880, 1074266240, 0, 2880, 1074266240, 0, 2880, 1074266240, 0, 4608, 73, 0, 4608, 73, 0, 4608, 73, 0, 8896, 524288, 0, 11200, 524288, 0, 11216, 524288, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 13312, 3674210331, 0, 14400, 3, 0, 14400, 3, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 15104, 3680501979, 0, 17280, 536870916, 0, 17280, 536870916, 0, 17284, 536870916, 0, 17284, 536870916, 0, 17288, 536870916, 0, 17288, 536870916, 0, 17296, 536870916, 0, 17296, 536870916, 0, 17300, 536870916, 0, 17300, 536870916, 0, 17304, 536870916, 0, 17304, 536870916, 0, 19648, 273, 0, 19648, 273, 0, 19648, 273, 0, 22272, 273, 0, 22272, 273, 0, 22272, 273, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23440, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 23456, 572662306, 0, 25616, 32, 0, 25632, 32, 0, 28352, 16384, 0, 32576, 32768, 0, 36672, 8, 0, 37888, 2281701384, 0, 37888, 2281701384, 0, 37888, 2281701384, 0, 37904, 2281701384, 0, 37904, 2281701384, 0, 37904, 2281701384, 0, 38592, 2281701384, 0, 38592, 2281701384, 0, 38592, 2281701384, 0, 38608, 2281701384, 0, 38608, 2281701384, 0, 38608, 2281701384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756410711657550807_879_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756410711657550807_879_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..74343e02 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756410711657550807_879_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 17))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } else { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 390 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3904, 32776, 0, 3904, 32776, 0, 4736, 136348168, 0, 4736, 136348168, 0, 4736, 136348168, 0, 4736, 136348168, 0, 4736, 136348168, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 8768, 512, 0, 8784, 512, 0, 8800, 512, 0, 10432, 532496, 0, 10432, 532496, 0, 10432, 532496, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 12416, 4194320, 0, 12416, 4194320, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3904, 32776, 0, 3904, 32776, 0, 4736, 136348168, 0, 4736, 136348168, 0, 4736, 136348168, 0, 4736, 136348168, 0, 4736, 136348168, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 6352, 1090785345, 0, 8768, 512, 0, 8784, 512, 0, 8800, 512, 0, 10432, 532496, 0, 10432, 532496, 0, 10432, 532496, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11472, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11488, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 11504, 2317918346, 0, 12416, 4194320, 0, 12416, 4194320, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0, 12736, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756410916438948008_882_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756410916438948008_882_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1f00c2d0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756410916438948008_882_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31))) { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 7876, 524288, 0, 7880, 524288, 0, 7892, 524288, 0, 7896, 524288, 0, 7908, 524288, 0, 7912, 524288, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 5696, 2113929599, 0, 7876, 524288, 0, 7880, 524288, 0, 7892, 524288, 0, 7896, 524288, 0, 7908, 524288, 0, 7912, 524288, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0, 8704, 2134900767, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756410981180954633_884_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756410981180954633_884_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66081e4a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756410981180954633_884_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,283 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 25))) { + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((253 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((263 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((270 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((285 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((296 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 8896, 4, 0, 10240, 67108864, 0, 12480, 1073741824, 0, 14992, 128, 0, 15008, 128, 0, 15024, 128, 0, 16208, 2290089984, 0, 16208, 2290089984, 0, 16208, 2290089984, 0, 16212, 2290089984, 0, 16212, 2290089984, 0, 16212, 2290089984, 0, 16224, 2290089984, 0, 16224, 2290089984, 0, 16224, 2290089984, 0, 16228, 2290089984, 0, 16228, 2290089984, 0, 16228, 2290089984, 0, 16240, 2290089984, 0, 16240, 2290089984, 0, 16240, 2290089984, 0, 16244, 2290089984, 0, 16244, 2290089984, 0, 16244, 2290089984, 0, 18256, 32768, 0, 18260, 32768, 0, 18272, 32768, 0, 18276, 32768, 0, 18288, 32768, 0, 18292, 32768, 0, 18960, 2147483656, 0, 18960, 2147483656, 0, 18964, 2147483656, 0, 18964, 2147483656, 0, 18976, 2147483656, 0, 18976, 2147483656, 0, 18980, 2147483656, 0, 18980, 2147483656, 0, 18992, 2147483656, 0, 18992, 2147483656, 0, 18996, 2147483656, 0, 18996, 2147483656, 0, 576, 17, 0, 576, 17, 0, 8896, 4, 0, 10240, 67108864, 0, 12480, 1073741824, 0, 14992, 128, 0, 15008, 128, 0, 15024, 128, 0, 16208, 2290089984, 0, 16208, 2290089984, 0, 16208, 2290089984, 0, 16212, 2290089984, 0, 16212, 2290089984, 0, 16212, 2290089984, 0, 16224, 2290089984, 0, 16224, 2290089984, 0, 16224, 2290089984, 0, 16228, 2290089984, 0, 16228, 2290089984, 0, 16228, 2290089984, 0, 16240, 2290089984, 0, 16240, 2290089984, 0, 16240, 2290089984, 0, 16244, 2290089984, 0, 16244, 2290089984, 0, 16244, 2290089984, 0, 18256, 32768, 0, 18260, 32768, 0, 18272, 32768, 0, 18276, 32768, 0, 18288, 32768, 0, 18292, 32768, 0, 18960, 2147483656, 0, 18960, 2147483656, 0, 18964, 2147483656, 0, 18964, 2147483656, 0, 18976, 2147483656, 0, 18976, 2147483656, 0, 18980, 2147483656, 0, 18980, 2147483656, 0, 18992, 2147483656, 0, 18992, 2147483656, 0, 18996, 2147483656, 0, 18996, 2147483656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756410988429922338_885_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756410988429922338_885_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c438d0d8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756410988429922338_885_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 3758096385, 0, 1216, 3758096385, 0, 1216, 3758096385, 0, 1216, 3758096385, 0, 3072, 3758096384, 0, 3072, 3758096384, 0, 3072, 3758096384, 0, 3088, 3758096384, 0, 3088, 3758096384, 0, 3088, 3758096384, 0, 3104, 3758096384, 0, 3104, 3758096384, 0, 3104, 3758096384, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 1216, 3758096385, 0, 1216, 3758096385, 0, 1216, 3758096385, 0, 1216, 3758096385, 0, 3072, 3758096384, 0, 3072, 3758096384, 0, 3072, 3758096384, 0, 3088, 3758096384, 0, 3088, 3758096384, 0, 3088, 3758096384, 0, 3104, 3758096384, 0, 3104, 3758096384, 0, 3104, 3758096384, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0, 3776, 4292870145, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411084404423908_889_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411084404423908_889_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f4af28d4 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411084404423908_889_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,87 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 1088, 127, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 832, 1431655680, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0, 1984, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411354298374590_891_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411354298374590_891_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3595cb78 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411354298374590_891_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,150 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 1024, 0, 2372, 1024, 0, 2376, 1024, 0, 2384, 1024, 0, 2388, 1024, 0, 2392, 1024, 0, 4416, 16384, 0, 4420, 16384, 0, 4424, 16384, 0, 4432, 16384, 0, 4436, 16384, 0, 4440, 16384, 0, 5248, 8, 0, 6464, 545392672, 0, 6464, 545392672, 0, 6464, 545392672, 0, 6464, 545392672, 0, 6464, 545392672, 0, 2368, 1024, 0, 2372, 1024, 0, 2376, 1024, 0, 2384, 1024, 0, 2388, 1024, 0, 2392, 1024, 0, 4416, 16384, 0, 4420, 16384, 0, 4424, 16384, 0, 4432, 16384, 0, 4436, 16384, 0, 4440, 16384, 0, 5248, 8, 0, 6464, 545392672, 0, 6464, 545392672, 0, 6464, 545392672, 0, 6464, 545392672, 0, 6464, 545392672, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411369929413166_892_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411369929413166_892_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f033ddac --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411369929413166_892_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 26))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 16777216, 0, 2384, 16777216, 0, 5376, 17, 0, 5376, 17, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 7360, 136, 0, 7360, 136, 0, 7376, 136, 0, 7376, 136, 0, 8960, 136, 0, 8960, 136, 0, 8976, 136, 0, 8976, 136, 0, 2368, 16777216, 0, 2384, 16777216, 0, 5376, 17, 0, 5376, 17, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 6272, 1145324612, 0, 7360, 136, 0, 7360, 136, 0, 7376, 136, 0, 7376, 136, 0, 8960, 136, 0, 8960, 136, 0, 8976, 136, 0, 8976, 136, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411378116733463_893_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411378116733463_893_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dbd146aa --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411378116733463_893_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,684 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 21))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((202 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((211 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((216 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((220 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((227 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (415 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (437 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((462 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((481 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (490 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (495 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (500 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((522 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((532 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((541 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((546 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((553 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((562 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((569 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((579 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((588 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (592 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 540 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 6336, 2113536, 0, 6336, 2113536, 0, 6976, 17, 0, 6976, 17, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 9408, 17, 0, 9408, 17, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 12944, 8, 0, 12948, 8, 0, 12960, 8, 0, 12964, 8, 0, 13840, 8390656, 0, 13840, 8390656, 0, 13844, 8390656, 0, 13844, 8390656, 0, 13856, 8390656, 0, 13856, 8390656, 0, 13860, 8390656, 0, 13860, 8390656, 0, 22400, 268435456, 0, 27968, 2097664, 0, 27968, 2097664, 0, 31680, 537002016, 0, 31680, 537002016, 0, 31680, 537002016, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 36416, 2184, 0, 36416, 2184, 0, 36416, 2184, 0, 36432, 2184, 0, 36432, 2184, 0, 36432, 2184, 0, 36448, 2184, 0, 36448, 2184, 0, 36448, 2184, 0, 576, 17, 0, 576, 17, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 6336, 2113536, 0, 6336, 2113536, 0, 6976, 17, 0, 6976, 17, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7552, 17895697, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 7872, 1729574743, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 9408, 17, 0, 9408, 17, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 10304, 1145324612, 0, 12944, 8, 0, 12948, 8, 0, 12960, 8, 0, 12964, 8, 0, 13840, 8390656, 0, 13840, 8390656, 0, 13844, 8390656, 0, 13844, 8390656, 0, 13856, 8390656, 0, 13856, 8390656, 0, 13860, 8390656, 0, 13860, 8390656, 0, 22400, 268435456, 0, 27968, 2097664, 0, 27968, 2097664, 0, 31680, 537002016, 0, 31680, 537002016, 0, 31680, 537002016, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 32000, 1145324612, 0, 36416, 2184, 0, 36416, 2184, 0, 36416, 2184, 0, 36432, 2184, 0, 36432, 2184, 0, 36432, 2184, 0, 36448, 2184, 0, 36448, 2184, 0, 36448, 2184, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411531906439701_894_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411531906439701_894_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b7cfe8e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411531906439701_894_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,133 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 31)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2624, 545392672, 0, 2624, 545392672, 0, 2624, 545392672, 0, 2624, 545392672, 0, 2624, 545392672, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2624, 545392672, 0, 2624, 545392672, 0, 2624, 545392672, 0, 2624, 545392672, 0, 2624, 545392672, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411571313410570_896_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411571313410570_896_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1a79bd18 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411571313410570_896_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 24)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 27))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 558 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 73, 0, 1216, 73, 0, 1216, 73, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 3584, 2147483650, 0, 3584, 2147483650, 0, 3600, 2147483650, 0, 3600, 2147483650, 0, 3616, 2147483650, 0, 3616, 2147483650, 0, 4416, 2415919106, 0, 4416, 2415919106, 0, 4416, 2415919106, 0, 4432, 2415919106, 0, 4432, 2415919106, 0, 4432, 2415919106, 0, 4448, 2415919106, 0, 4448, 2415919106, 0, 4448, 2415919106, 0, 4992, 272696336, 0, 4992, 272696336, 0, 4992, 272696336, 0, 4992, 272696336, 0, 4992, 272696336, 0, 5008, 272696336, 0, 5008, 272696336, 0, 5008, 272696336, 0, 5008, 272696336, 0, 5008, 272696336, 0, 5024, 272696336, 0, 5024, 272696336, 0, 5024, 272696336, 0, 5024, 272696336, 0, 5024, 272696336, 0, 6144, 536870916, 0, 6144, 536870916, 0, 7936, 536870948, 0, 7936, 536870948, 0, 7936, 536870948, 0, 9600, 68173824, 0, 9600, 68173824, 0, 9600, 68173824, 0, 11088, 2818572298, 0, 11088, 2818572298, 0, 11088, 2818572298, 0, 11088, 2818572298, 0, 11088, 2818572298, 0, 11104, 2818572298, 0, 11104, 2818572298, 0, 11104, 2818572298, 0, 11104, 2818572298, 0, 11104, 2818572298, 0, 11120, 2818572298, 0, 11120, 2818572298, 0, 11120, 2818572298, 0, 11120, 2818572298, 0, 11120, 2818572298, 0, 11664, 1342177301, 0, 11664, 1342177301, 0, 11664, 1342177301, 0, 11664, 1342177301, 0, 11664, 1342177301, 0, 11680, 1342177301, 0, 11680, 1342177301, 0, 11680, 1342177301, 0, 11680, 1342177301, 0, 11680, 1342177301, 0, 11696, 1342177301, 0, 11696, 1342177301, 0, 11696, 1342177301, 0, 11696, 1342177301, 0, 11696, 1342177301, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 73, 0, 1216, 73, 0, 1216, 73, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2432, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2448, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 2464, 2181570690, 0, 3584, 2147483650, 0, 3584, 2147483650, 0, 3600, 2147483650, 0, 3600, 2147483650, 0, 3616, 2147483650, 0, 3616, 2147483650, 0, 4416, 2415919106, 0, 4416, 2415919106, 0, 4416, 2415919106, 0, 4432, 2415919106, 0, 4432, 2415919106, 0, 4432, 2415919106, 0, 4448, 2415919106, 0, 4448, 2415919106, 0, 4448, 2415919106, 0, 4992, 272696336, 0, 4992, 272696336, 0, 4992, 272696336, 0, 4992, 272696336, 0, 4992, 272696336, 0, 5008, 272696336, 0, 5008, 272696336, 0, 5008, 272696336, 0, 5008, 272696336, 0, 5008, 272696336, 0, 5024, 272696336, 0, 5024, 272696336, 0, 5024, 272696336, 0, 5024, 272696336, 0, 5024, 272696336, 0, 6144, 536870916, 0, 6144, 536870916, 0, 7936, 536870948, 0, 7936, 536870948, 0, 7936, 536870948, 0, 9600, 68173824, 0, 9600, 68173824, 0, 9600, 68173824, 0, 11088, 2818572298, 0, 11088, 2818572298, 0, 11088, 2818572298, 0, 11088, 2818572298, 0, 11088, 2818572298, 0, 11104, 2818572298, 0, 11104, 2818572298, 0, 11104, 2818572298, 0, 11104, 2818572298, 0, 11104, 2818572298, 0, 11120, 2818572298, 0, 11120, 2818572298, 0, 11120, 2818572298, 0, 11120, 2818572298, 0, 11120, 2818572298, 0, 11664, 1342177301, 0, 11664, 1342177301, 0, 11664, 1342177301, 0, 11664, 1342177301, 0, 11664, 1342177301, 0, 11680, 1342177301, 0, 11680, 1342177301, 0, 11680, 1342177301, 0, 11680, 1342177301, 0, 11680, 1342177301, 0, 11696, 1342177301, 0, 11696, 1342177301, 0, 11696, 1342177301, 0, 11696, 1342177301, 0, 11696, 1342177301, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411580144349471_897_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411580144349471_897_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28b97e8c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411580144349471_897_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 18))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 4288, 2147483654, 0, 4288, 2147483654, 0, 4288, 2147483654, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 4288, 2147483654, 0, 4288, 2147483654, 0, 4288, 2147483654, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411621181772148_899_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411621181772148_899_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..69fc7252 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411621181772148_899_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,151 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 2147483651, 0, 2688, 2147483651, 0, 2688, 2147483651, 0, 2704, 2147483651, 0, 2704, 2147483651, 0, 2704, 2147483651, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0, 2688, 2147483651, 0, 2688, 2147483651, 0, 2688, 2147483651, 0, 2704, 2147483651, 0, 2704, 2147483651, 0, 2704, 2147483651, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 5440, 2147483775, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0, 6016, 1398016, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411623665093087_900_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411623665093087_900_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c7ec0dd1 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411623665093087_900_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,94 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 16, 0, 1280, 16, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411623940956438_901_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411623940956438_901_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c83410e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411623940956438_901_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 462 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 2112, 8, 0, 3008, 8390656, 0, 3008, 8390656, 0, 3840, 1, 0, 7232, 1048832, 0, 7232, 1048832, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8576, 559240, 0, 8576, 559240, 0, 8576, 559240, 0, 8576, 559240, 0, 8576, 559240, 0, 576, 17, 0, 576, 17, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1152, 286331153, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 1472, 2004318071, 0, 2112, 8, 0, 3008, 8390656, 0, 3008, 8390656, 0, 3840, 1, 0, 7232, 1048832, 0, 7232, 1048832, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 7808, 286331153, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8128, 2004318071, 0, 8576, 559240, 0, 8576, 559240, 0, 8576, 559240, 0, 8576, 559240, 0, 8576, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411672938675514_903_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411672938675514_903_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b46d900b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411672938675514_903_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411673143614286_904_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411673143614286_904_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca7fe52a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411673143614286_904_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,88 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 23))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756411727665136934_906_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756411727665136934_906_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0002a1a6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756411727665136934_906_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 12))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1728, 2415919104, 0, 1728, 2415919104, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1728, 2415919104, 0, 1728, 2415919104, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0, 6336, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412106820162731_910_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412106820162731_910_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fab0aa08 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412106820162731_910_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 5632, 17, 0, 5632, 17, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 10836, 16384, 0, 10840, 16384, 0, 10844, 16384, 0, 10852, 16384, 0, 10856, 16384, 0, 10860, 16384, 0, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 1472, 1145324612, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 4992, 1431655765, 0, 5632, 17, 0, 5632, 17, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9680, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 9696, 1145324612, 0, 10836, 16384, 0, 10840, 16384, 0, 10844, 16384, 0, 10852, 16384, 0, 10856, 16384, 0, 10860, 16384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412108712643811_911_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412108712643811_911_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..181de27e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412108712643811_911_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() >= 21)) { + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 20)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 24))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2128, 67108864, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 6656, 8200, 0, 6656, 8200, 0, 8832, 131072, 0, 12096, 16, 0, 13440, 32772, 0, 13440, 32772, 0, 14080, 85, 0, 14080, 85, 0, 14080, 85, 0, 14080, 85, 0, 17104, 8200, 0, 17104, 8200, 0, 17120, 8200, 0, 17120, 8200, 0, 2128, 67108864, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 3840, 4286578688, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4928, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 4944, 1431306240, 0, 6656, 8200, 0, 6656, 8200, 0, 8832, 131072, 0, 12096, 16, 0, 13440, 32772, 0, 13440, 32772, 0, 14080, 85, 0, 14080, 85, 0, 14080, 85, 0, 14080, 85, 0, 17104, 8200, 0, 17104, 8200, 0, 17120, 8200, 0, 17120, 8200, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412124144493288_912_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412124144493288_912_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c841aa01 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412124144493288_912_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,216 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 4800, 2, 0, 4816, 2, 0, 6336, 16, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 4800, 2, 0, 4816, 2, 0, 6336, 16, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0, 10368, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412172721649720_914_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412172721649720_914_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2ff6fc9f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412172721649720_914_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 18))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() >= 17)) { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [12496, 128, 0, 12512, 128, 0, 12528, 128, 0, 13696, 4160749568, 0, 13696, 4160749568, 0, 13696, 4160749568, 0, 13696, 4160749568, 0, 13696, 4160749568, 0, 12496, 128, 0, 12512, 128, 0, 12528, 128, 0, 13696, 4160749568, 0, 13696, 4160749568, 0, 13696, 4160749568, 0, 13696, 4160749568, 0, 13696, 4160749568, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412400620886473_916_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412400620886473_916_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cccbe098 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412400620886473_916_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,127 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412400759604469_917_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412400759604469_917_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c57cbcaf --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412400759604469_917_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,69 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412400910746452_918_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412400910746452_918_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..895119f2 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412400910746452_918_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 18))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 7056, 2147483650, 0, 7056, 2147483650, 0, 7072, 2147483650, 0, 7072, 2147483650, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 7056, 2147483650, 0, 7056, 2147483650, 0, 7072, 2147483650, 0, 7072, 2147483650, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412893693987939_922_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412893693987939_922_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d9db9f4d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412893693987939_922_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,132 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5184, 8388608, 0, 4800, 1040187392, 0, 4800, 1040187392, 0, 4800, 1040187392, 0, 4800, 1040187392, 0, 4800, 1040187392, 0, 4032, 2623488, 0, 4032, 2623488, 0, 4032, 2623488, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5440, 3221225983, 0, 5184, 8388608, 0, 4800, 1040187392, 0, 4800, 1040187392, 0, 4800, 1040187392, 0, 4800, 1040187392, 0, 4800, 1040187392, 0, 4032, 2623488, 0, 4032, 2623488, 0, 4032, 2623488, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412946169232271_924_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412946169232271_924_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..892c0f29 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412946169232271_924_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,370 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 28))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 16)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((287 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((301 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (366 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 134221824, 0, 2112, 134221824, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 23424, 1, 0, 24000, 272696336, 0, 24000, 272696336, 0, 24000, 272696336, 0, 24000, 272696336, 0, 24000, 272696336, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 2112, 134221824, 0, 2112, 134221824, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 7616, 4278190080, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 21312, 1090785345, 0, 23424, 1, 0, 24000, 272696336, 0, 24000, 272696336, 0, 24000, 272696336, 0, 24000, 272696336, 0, 24000, 272696336, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0, 24320, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412977809281180_926_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412977809281180_926_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..84e1328b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412977809281180_926_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 20))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0, 2256, 1, 0, 2272, 1, 0, 6592, 17, 0, 6592, 17, 0, 7808, 64, 0, 10816, 559240, 0, 10816, 559240, 0, 10816, 559240, 0, 10816, 559240, 0, 10816, 559240, 0, 1088, 1, 0, 2256, 1, 0, 2272, 1, 0, 6592, 17, 0, 6592, 17, 0, 7808, 64, 0, 10816, 559240, 0, 10816, 559240, 0, 10816, 559240, 0, 10816, 559240, 0, 10816, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412981420089710_927_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412981420089710_927_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f0227fce --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412981420089710_927_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 30))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7872, 559240, 0, 7872, 559240, 0, 7872, 559240, 0, 7872, 559240, 0, 7872, 559240, 0, 768, 1, 0, 1344, 268501008, 0, 1344, 268501008, 0, 1344, 268501008, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7424, 1145324612, 0, 7872, 559240, 0, 7872, 559240, 0, 7872, 559240, 0, 7872, 559240, 0, 7872, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412984140888388_928_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412984140888388_928_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f1d4026b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412984140888388_928_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 7))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0, 3776, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756412985045785509_929_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756412985045785509_929_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..948a40a9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756412985045785509_929_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 19))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 3664, 1048576, 0, 3680, 1048576, 0, 3696, 1048576, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 832, 1431655765, 0, 3664, 1048576, 0, 3680, 1048576, 0, 3696, 1048576, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756413164892911272_932_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756413164892911272_932_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c866a649 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756413164892911272_932_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 266248, 0, 1856, 266248, 0, 1856, 266248, 0, 1472, 268435584, 0, 1472, 268435584, 0, 2496, 85, 0, 2496, 85, 0, 2496, 85, 0, 2496, 85, 0, 3712, 2684354570, 0, 3712, 2684354570, 0, 3712, 2684354570, 0, 3712, 2684354570, 0, 3728, 2684354570, 0, 3728, 2684354570, 0, 3728, 2684354570, 0, 3728, 2684354570, 0, 1856, 266248, 0, 1856, 266248, 0, 1856, 266248, 0, 1472, 268435584, 0, 1472, 268435584, 0, 2496, 85, 0, 2496, 85, 0, 2496, 85, 0, 2496, 85, 0, 3712, 2684354570, 0, 3712, 2684354570, 0, 3712, 2684354570, 0, 3712, 2684354570, 0, 3728, 2684354570, 0, 3728, 2684354570, 0, 3728, 2684354570, 0, 3728, 2684354570, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756413166101235889_933_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756413166101235889_933_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17eb48f6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756413166101235889_933_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2064, 16777216, 0, 2960, 16777216, 0, 2064, 16777216, 0, 2960, 16777216, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756413166743206962_934_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756413166743206962_934_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b46d900b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756413166743206962_934_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756413167927333195_935_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756413167927333195_935_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3afb1678 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756413167927333195_935_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,344 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 21)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() >= 29)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 3648, 85, 0, 3648, 85, 0, 3648, 85, 0, 3648, 85, 0, 4288, 8, 0, 5504, 2147483648, 0, 7296, 85, 0, 7296, 85, 0, 7296, 85, 0, 7296, 85, 0, 7936, 8, 0, 14528, 2080, 0, 14528, 2080, 0, 14544, 2080, 0, 14544, 2080, 0, 14560, 2080, 0, 14560, 2080, 0, 19520, 32, 0, 19536, 32, 0, 19552, 32, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 3648, 85, 0, 3648, 85, 0, 3648, 85, 0, 3648, 85, 0, 4288, 8, 0, 5504, 2147483648, 0, 7296, 85, 0, 7296, 85, 0, 7296, 85, 0, 7296, 85, 0, 7936, 8, 0, 14528, 2080, 0, 14528, 2080, 0, 14544, 2080, 0, 14544, 2080, 0, 14560, 2080, 0, 14560, 2080, 0, 19520, 32, 0, 19536, 32, 0, 19552, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756413176875276923_936_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756413176875276923_936_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6ea98654 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756413176875276923_936_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,340 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 384 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 9, 0, 1936, 9, 0, 1952, 9, 0, 1952, 9, 0, 3728, 9, 0, 3728, 9, 0, 3744, 9, 0, 3744, 9, 0, 5696, 1207959552, 0, 5696, 1207959552, 0, 6784, 1073741824, 0, 6800, 1073741824, 0, 7360, 134217728, 0, 7376, 134217728, 0, 8256, 1207959552, 0, 8256, 1207959552, 0, 8832, 272696336, 0, 8832, 272696336, 0, 8832, 272696336, 0, 8832, 272696336, 0, 8832, 272696336, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 17472, 8, 0, 1936, 9, 0, 1936, 9, 0, 1952, 9, 0, 1952, 9, 0, 3728, 9, 0, 3728, 9, 0, 3744, 9, 0, 3744, 9, 0, 5696, 1207959552, 0, 5696, 1207959552, 0, 6784, 1073741824, 0, 6800, 1073741824, 0, 7360, 134217728, 0, 7376, 134217728, 0, 8256, 1207959552, 0, 8256, 1207959552, 0, 8832, 272696336, 0, 8832, 272696336, 0, 8832, 272696336, 0, 8832, 272696336, 0, 8832, 272696336, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 9152, 613566756, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16512, 286331153, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 16832, 2004318071, 0, 17472, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756413230266892409_937_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756413230266892409_937_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e7c01504 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756413230266892409_937_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,386 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((26 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 21))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22))) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((295 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (389 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 600 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1680, 136348168, 0, 1680, 136348168, 0, 1680, 136348168, 0, 1680, 136348168, 0, 1680, 136348168, 0, 1684, 136348168, 0, 1684, 136348168, 0, 1684, 136348168, 0, 1684, 136348168, 0, 1684, 136348168, 0, 1688, 136348168, 0, 1688, 136348168, 0, 1688, 136348168, 0, 1688, 136348168, 0, 1688, 136348168, 0, 1696, 136348168, 0, 1696, 136348168, 0, 1696, 136348168, 0, 1696, 136348168, 0, 1696, 136348168, 0, 1700, 136348168, 0, 1700, 136348168, 0, 1700, 136348168, 0, 1700, 136348168, 0, 1700, 136348168, 0, 1704, 136348168, 0, 1704, 136348168, 0, 1704, 136348168, 0, 1704, 136348168, 0, 1704, 136348168, 0, 1712, 136348168, 0, 1712, 136348168, 0, 1712, 136348168, 0, 1712, 136348168, 0, 1712, 136348168, 0, 1716, 136348168, 0, 1716, 136348168, 0, 1716, 136348168, 0, 1716, 136348168, 0, 1716, 136348168, 0, 1720, 136348168, 0, 1720, 136348168, 0, 1720, 136348168, 0, 1720, 136348168, 0, 1720, 136348168, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 6400, 17, 0, 6400, 17, 0, 7296, 1078199360, 0, 7296, 1078199360, 0, 7296, 1078199360, 0, 7296, 1078199360, 0, 7296, 1078199360, 0, 13712, 32768, 0, 13728, 32768, 0, 13744, 32768, 0, 14416, 2315255810, 0, 14416, 2315255810, 0, 14416, 2315255810, 0, 14416, 2315255810, 0, 14432, 2315255810, 0, 14432, 2315255810, 0, 14432, 2315255810, 0, 14432, 2315255810, 0, 14448, 2315255810, 0, 14448, 2315255810, 0, 14448, 2315255810, 0, 14448, 2315255810, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 15360, 17, 0, 15360, 17, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 1680, 136348168, 0, 1680, 136348168, 0, 1680, 136348168, 0, 1680, 136348168, 0, 1680, 136348168, 0, 1684, 136348168, 0, 1684, 136348168, 0, 1684, 136348168, 0, 1684, 136348168, 0, 1684, 136348168, 0, 1688, 136348168, 0, 1688, 136348168, 0, 1688, 136348168, 0, 1688, 136348168, 0, 1688, 136348168, 0, 1696, 136348168, 0, 1696, 136348168, 0, 1696, 136348168, 0, 1696, 136348168, 0, 1696, 136348168, 0, 1700, 136348168, 0, 1700, 136348168, 0, 1700, 136348168, 0, 1700, 136348168, 0, 1700, 136348168, 0, 1704, 136348168, 0, 1704, 136348168, 0, 1704, 136348168, 0, 1704, 136348168, 0, 1704, 136348168, 0, 1712, 136348168, 0, 1712, 136348168, 0, 1712, 136348168, 0, 1712, 136348168, 0, 1712, 136348168, 0, 1716, 136348168, 0, 1716, 136348168, 0, 1716, 136348168, 0, 1716, 136348168, 0, 1716, 136348168, 0, 1720, 136348168, 0, 1720, 136348168, 0, 1720, 136348168, 0, 1720, 136348168, 0, 1720, 136348168, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 5760, 1363481681, 0, 6400, 17, 0, 6400, 17, 0, 7296, 1078199360, 0, 7296, 1078199360, 0, 7296, 1078199360, 0, 7296, 1078199360, 0, 7296, 1078199360, 0, 13712, 32768, 0, 13728, 32768, 0, 13744, 32768, 0, 14416, 2315255810, 0, 14416, 2315255810, 0, 14416, 2315255810, 0, 14416, 2315255810, 0, 14432, 2315255810, 0, 14432, 2315255810, 0, 14432, 2315255810, 0, 14432, 2315255810, 0, 14448, 2315255810, 0, 14448, 2315255810, 0, 14448, 2315255810, 0, 14448, 2315255810, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 14720, 613566756, 0, 15360, 17, 0, 15360, 17, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0, 25920, 838860, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756414361469484584_939_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756414361469484584_939_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6d0f736 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756414361469484584_939_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 26)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3520, 136315392, 0, 3520, 136315392, 0, 3520, 136315392, 0, 3536, 136315392, 0, 3536, 136315392, 0, 3536, 136315392, 0, 3552, 136315392, 0, 3552, 136315392, 0, 3552, 136315392, 0, 4224, 136315392, 0, 4224, 136315392, 0, 4224, 136315392, 0, 4240, 136315392, 0, 4240, 136315392, 0, 4240, 136315392, 0, 4256, 136315392, 0, 4256, 136315392, 0, 4256, 136315392, 0, 4800, 1207959552, 0, 4800, 1207959552, 0, 4816, 1207959552, 0, 4816, 1207959552, 0, 4832, 1207959552, 0, 4832, 1207959552, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 6528, 603979776, 0, 6528, 603979776, 0, 8064, 612368384, 0, 8064, 612368384, 0, 8064, 612368384, 0, 3520, 136315392, 0, 3520, 136315392, 0, 3520, 136315392, 0, 3536, 136315392, 0, 3536, 136315392, 0, 3536, 136315392, 0, 3552, 136315392, 0, 3552, 136315392, 0, 3552, 136315392, 0, 4224, 136315392, 0, 4224, 136315392, 0, 4224, 136315392, 0, 4240, 136315392, 0, 4240, 136315392, 0, 4240, 136315392, 0, 4256, 136315392, 0, 4256, 136315392, 0, 4256, 136315392, 0, 4800, 1207959552, 0, 4800, 1207959552, 0, 4816, 1207959552, 0, 4816, 1207959552, 0, 4832, 1207959552, 0, 4832, 1207959552, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 5376, 272696336, 0, 6528, 603979776, 0, 6528, 603979776, 0, 8064, 612368384, 0, 8064, 612368384, 0, 8064, 612368384, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756414364557637749_940_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756414364557637749_940_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5b1c603f --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756414364557637749_940_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2688, 2454192128, 0, 2688, 2454192128, 0, 2688, 2454192128, 0, 2688, 2454192128, 0, 2688, 2454192128, 0, 3328, 16, 0, 4224, 4195328, 0, 4224, 4195328, 0, 4672, 524416, 0, 4672, 524416, 0, 5120, 146, 0, 5120, 146, 0, 5120, 146, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 8448, 64, 0, 11328, 64, 0, 11904, 272696336, 0, 11904, 272696336, 0, 11904, 272696336, 0, 11904, 272696336, 0, 11904, 272696336, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2688, 2454192128, 0, 2688, 2454192128, 0, 2688, 2454192128, 0, 2688, 2454192128, 0, 2688, 2454192128, 0, 3328, 16, 0, 4224, 4195328, 0, 4224, 4195328, 0, 4672, 524416, 0, 4672, 524416, 0, 5120, 146, 0, 5120, 146, 0, 5120, 146, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 5440, 613566756, 0, 8448, 64, 0, 11328, 64, 0, 11904, 272696336, 0, 11904, 272696336, 0, 11904, 272696336, 0, 11904, 272696336, 0, 11904, 272696336, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0, 12224, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756414367832536086_941_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756414367832536086_941_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3aa366bc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756414367832536086_941_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 28))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9600, 538066944, 0, 9600, 538066944, 0, 9600, 538066944, 0, 9600, 538066944, 0, 10240, 85, 0, 10240, 85, 0, 10240, 85, 0, 10240, 85, 0, 12864, 526464, 0, 12864, 526464, 0, 12864, 526464, 0, 13904, 34, 0, 13904, 34, 0, 13920, 34, 0, 13920, 34, 0, 15104, 134225920, 0, 15104, 134225920, 0, 16576, 2164785409, 0, 16576, 2164785409, 0, 16576, 2164785409, 0, 16576, 2164785409, 0, 16576, 2164785409, 0, 9600, 538066944, 0, 9600, 538066944, 0, 9600, 538066944, 0, 9600, 538066944, 0, 10240, 85, 0, 10240, 85, 0, 10240, 85, 0, 10240, 85, 0, 12864, 526464, 0, 12864, 526464, 0, 12864, 526464, 0, 13904, 34, 0, 13904, 34, 0, 13920, 34, 0, 13920, 34, 0, 15104, 134225920, 0, 15104, 134225920, 0, 16576, 2164785409, 0, 16576, 2164785409, 0, 16576, 2164785409, 0, 16576, 2164785409, 0, 16576, 2164785409, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756414374250393832_943_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756414374250393832_943_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5edf680b --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756414374250393832_943_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,409 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 23)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 29))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((383 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((436 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6912, 1073741828, 0, 6912, 1073741828, 0, 8832, 1073741828, 0, 8832, 1073741828, 0, 10240, 559240, 0, 10240, 559240, 0, 10240, 559240, 0, 10240, 559240, 0, 10240, 559240, 0, 15488, 2, 0, 20224, 2097152, 0, 24528, 1024, 0, 24544, 1024, 0, 24832, 67125252, 0, 24832, 67125252, 0, 24832, 67125252, 0, 27920, 8, 0, 27936, 8, 0, 576, 17, 0, 576, 17, 0, 6912, 1073741828, 0, 6912, 1073741828, 0, 8832, 1073741828, 0, 8832, 1073741828, 0, 10240, 559240, 0, 10240, 559240, 0, 10240, 559240, 0, 10240, 559240, 0, 10240, 559240, 0, 15488, 2, 0, 20224, 2097152, 0, 24528, 1024, 0, 24544, 1024, 0, 24832, 67125252, 0, 24832, 67125252, 0, 24832, 67125252, 0, 27920, 8, 0, 27936, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756414531541458987_946_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756414531541458987_946_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..82a201e0 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756414531541458987_946_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,371 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 6))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((233 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((252 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((256 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((i2 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((360 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((375 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((382 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((389 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 846 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 64, 0, 11712, 32768, 0, 12160, 2, 0, 13584, 2359328, 0, 13584, 2359328, 0, 13584, 2359328, 0, 13600, 2359328, 0, 13600, 2359328, 0, 13600, 2359328, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 15568, 21, 0, 15568, 21, 0, 15568, 21, 0, 15572, 21, 0, 15572, 21, 0, 15572, 21, 0, 15576, 21, 0, 15576, 21, 0, 15576, 21, 0, 15584, 21, 0, 15584, 21, 0, 15584, 21, 0, 15588, 21, 0, 15588, 21, 0, 15588, 21, 0, 15592, 21, 0, 15592, 21, 0, 15592, 21, 0, 17808, 67379200, 0, 17808, 67379200, 0, 17808, 67379200, 0, 17824, 67379200, 0, 17824, 67379200, 0, 17824, 67379200, 0, 18240, 8, 0, 18880, 73, 0, 18880, 73, 0, 18880, 73, 0, 19456, 272696336, 0, 19456, 272696336, 0, 19456, 272696336, 0, 19456, 272696336, 0, 19456, 272696336, 0, 20368, 292, 0, 20368, 292, 0, 20368, 292, 0, 20384, 292, 0, 20384, 292, 0, 20384, 292, 0, 20400, 292, 0, 20400, 292, 0, 20400, 292, 0, 24464, 4, 0, 24468, 4, 0, 24480, 4, 0, 24484, 4, 0, 24496, 4, 0, 24500, 4, 0, 1792, 64, 0, 11712, 32768, 0, 12160, 2, 0, 13584, 2359328, 0, 13584, 2359328, 0, 13584, 2359328, 0, 13600, 2359328, 0, 13600, 2359328, 0, 13600, 2359328, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14928, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14932, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14936, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14944, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14948, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 14952, 4259315727, 0, 15568, 21, 0, 15568, 21, 0, 15568, 21, 0, 15572, 21, 0, 15572, 21, 0, 15572, 21, 0, 15576, 21, 0, 15576, 21, 0, 15576, 21, 0, 15584, 21, 0, 15584, 21, 0, 15584, 21, 0, 15588, 21, 0, 15588, 21, 0, 15588, 21, 0, 15592, 21, 0, 15592, 21, 0, 15592, 21, 0, 17808, 67379200, 0, 17808, 67379200, 0, 17808, 67379200, 0, 17824, 67379200, 0, 17824, 67379200, 0, 17824, 67379200, 0, 18240, 8, 0, 18880, 73, 0, 18880, 73, 0, 18880, 73, 0, 19456, 272696336, 0, 19456, 272696336, 0, 19456, 272696336, 0, 19456, 272696336, 0, 19456, 272696336, 0, 20368, 292, 0, 20368, 292, 0, 20368, 292, 0, 20384, 292, 0, 20384, 292, 0, 20384, 292, 0, 20400, 292, 0, 20400, 292, 0, 20400, 292, 0, 24464, 4, 0, 24468, 4, 0, 24480, 4, 0, 24484, 4, 0, 24496, 4, 0, 24500, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415244126871505_948_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415244126871505_948_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa89a2f9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415244126871505_948_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,300 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3328, 67174912, 0, 3328, 67174912, 0, 3328, 67174912, 0, 3072, 4160749568, 0, 3072, 4160749568, 0, 3072, 4160749568, 0, 3072, 4160749568, 0, 3072, 4160749568, 0, 2816, 2097152, 0, 2432, 2048, 0, 2048, 8388608, 0, 5328, 1073741825, 0, 5328, 1073741825, 0, 5344, 1073741825, 0, 5344, 1073741825, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 6528, 4, 0, 8704, 73, 0, 8704, 73, 0, 8704, 73, 0, 12736, 268435456, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 3328, 67174912, 0, 3328, 67174912, 0, 3328, 67174912, 0, 3072, 4160749568, 0, 3072, 4160749568, 0, 3072, 4160749568, 0, 3072, 4160749568, 0, 3072, 4160749568, 0, 2816, 2097152, 0, 2432, 2048, 0, 2048, 8388608, 0, 5328, 1073741825, 0, 5328, 1073741825, 0, 5344, 1073741825, 0, 5344, 1073741825, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 5888, 272696336, 0, 6528, 4, 0, 8704, 73, 0, 8704, 73, 0, 8704, 73, 0, 12736, 268435456, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0, 13632, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415301103295207_949_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415301103295207_949_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a52c7ff7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415301103295207_949_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,336 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2496, 1, 0, 3136, 1, 0, 3712, 1, 0, 4032, 1, 0, 4992, 1, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 8512, 8, 0, 9408, 8390656, 0, 9408, 8390656, 0, 12496, 16777216, 0, 12512, 16777216, 0, 12944, 64, 0, 12960, 64, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2496, 1, 0, 3136, 1, 0, 3712, 1, 0, 4032, 1, 0, 4992, 1, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 7872, 1145324612, 0, 8512, 8, 0, 9408, 8390656, 0, 9408, 8390656, 0, 12496, 16777216, 0, 12512, 16777216, 0, 12944, 64, 0, 12960, 64, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415344721946871_950_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415344721946871_950_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db15f5be --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415344721946871_950_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1024, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0, 1040, 2863311530, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415348339861136_951_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415348339861136_951_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5cd45b76 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415348339861136_951_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5376, 16, 0, 5376, 16, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415470432329799_953_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415470432329799_953_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d6951b65 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415470432329799_953_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,176 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 3904, 8388608, 0, 3908, 8388608, 0, 3920, 8388608, 0, 3924, 8388608, 0, 3936, 8388608, 0, 3940, 8388608, 0, 6992, 134742024, 0, 6992, 134742024, 0, 6992, 134742024, 0, 7008, 134742024, 0, 7008, 134742024, 0, 7008, 134742024, 0, 7024, 134742024, 0, 7024, 134742024, 0, 7024, 134742024, 0, 9536, 524288, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 1152, 1363481681, 0, 3904, 8388608, 0, 3908, 8388608, 0, 3920, 8388608, 0, 3924, 8388608, 0, 3936, 8388608, 0, 3940, 8388608, 0, 6992, 134742024, 0, 6992, 134742024, 0, 6992, 134742024, 0, 7008, 134742024, 0, 7008, 134742024, 0, 7008, 134742024, 0, 7024, 134742024, 0, 7024, 134742024, 0, 7024, 134742024, 0, 9536, 524288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415499260058043_954_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415499260058043_954_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab76d08d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415499260058043_954_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2880, 1024, 0, 2896, 1024, 0, 2912, 1024, 0, 4352, 559240, 0, 4352, 559240, 0, 4352, 559240, 0, 4352, 559240, 0, 4352, 559240, 0, 6016, 134250496, 0, 6016, 134250496, 0, 11008, 85, 0, 11008, 85, 0, 11008, 85, 0, 11008, 85, 0, 576, 17, 0, 576, 17, 0, 2880, 1024, 0, 2896, 1024, 0, 2912, 1024, 0, 4352, 559240, 0, 4352, 559240, 0, 4352, 559240, 0, 4352, 559240, 0, 4352, 559240, 0, 6016, 134250496, 0, 6016, 134250496, 0, 11008, 85, 0, 11008, 85, 0, 11008, 85, 0, 11008, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415644665942421_956_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415644665942421_956_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..659150d7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415644665942421_956_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,303 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [16896, 85, 0, 16896, 85, 0, 16896, 85, 0, 16896, 85, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 16896, 85, 0, 16896, 85, 0, 16896, 85, 0, 16896, 85, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0, 17472, 1431655765, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415692578897308_958_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415692578897308_958_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1eb2887a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415692578897308_958_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 536870944, 0, 1344, 536870944, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1344, 536870944, 0, 1344, 536870944, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0, 1088, 2326440586, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415693873157762_959_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415693873157762_959_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..07877308 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415693873157762_959_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,132 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3280, 16384, 0, 3296, 16384, 0, 3312, 16384, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 576, 17, 0, 576, 17, 0, 3280, 16384, 0, 3296, 16384, 0, 3312, 16384, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0, 4160, 2290649224, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756415699299779838_960_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756415699299779838_960_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b592d860 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756415699299779838_960_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 28))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 18))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 26))) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 3221225472, 0, 1088, 3221225472, 0, 704, 65536, 0, 1728, 73, 0, 1728, 73, 0, 1728, 73, 0, 3904, 4096, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 11072, 1, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 8192, 0, 1088, 3221225472, 0, 1088, 3221225472, 0, 704, 65536, 0, 1728, 73, 0, 1728, 73, 0, 1728, 73, 0, 3904, 4096, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 9024, 289735761, 0, 11072, 1, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 2317910658, 0, 12480, 8192, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756416358780626240_963_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756416358780626240_963_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ec09481d --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756416358780626240_963_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1104, 4286579199, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0, 1808, 3221225503, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756416359483321641_964_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756416359483321641_964_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9cc0d71a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756416359483321641_964_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 23)) { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 25)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 3221225472, 0, 576, 3221225472, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4288, 112, 0, 4288, 112, 0, 4288, 112, 0, 3904, 128, 0, 576, 3221225472, 0, 576, 3221225472, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4544, 4290773007, 0, 4288, 112, 0, 4288, 112, 0, 4288, 112, 0, 3904, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756416360126355595_965_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756416360126355595_965_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9673d605 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756416360126355595_965_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,261 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 7))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 6))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 20)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 1312, 1, 0, 1328, 1, 0, 4624, 268435457, 0, 4624, 268435457, 0, 4640, 268435457, 0, 4640, 268435457, 0, 4656, 268435457, 0, 4656, 268435457, 0, 5696, 572653568, 0, 5696, 572653568, 0, 5696, 572653568, 0, 5696, 572653568, 0, 5712, 572653568, 0, 5712, 572653568, 0, 5712, 572653568, 0, 5712, 572653568, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 1296, 1, 0, 1312, 1, 0, 1328, 1, 0, 4624, 268435457, 0, 4624, 268435457, 0, 4640, 268435457, 0, 4640, 268435457, 0, 4656, 268435457, 0, 4656, 268435457, 0, 5696, 572653568, 0, 5696, 572653568, 0, 5696, 572653568, 0, 5696, 572653568, 0, 5712, 572653568, 0, 5712, 572653568, 0, 5712, 572653568, 0, 5712, 572653568, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8320, 1145324612, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0, 8768, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756416376418326326_966_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756416376418326326_966_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c3e72d7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756416376418326326_966_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,434 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 1))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 13))) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((255 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((371 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((397 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((408 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 23))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((426 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((433 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((448 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 6400, 559240, 0, 6400, 559240, 0, 6400, 559240, 0, 6400, 559240, 0, 6400, 559240, 0, 7040, 17, 0, 7040, 17, 0, 10176, 8, 0, 12224, 8390656, 0, 12224, 8390656, 0, 16340, 4096, 0, 16356, 4096, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 21824, 8388608, 0, 21840, 8388608, 0, 21856, 8388608, 0, 27264, 134217728, 0, 27280, 134217728, 0, 27296, 134217728, 0, 27712, 34952, 0, 27712, 34952, 0, 27712, 34952, 0, 27712, 34952, 0, 27728, 34952, 0, 27728, 34952, 0, 27728, 34952, 0, 27728, 34952, 0, 27744, 34952, 0, 27744, 34952, 0, 27744, 34952, 0, 27744, 34952, 0, 28672, 8388616, 0, 28672, 8388616, 0, 28688, 8388616, 0, 28688, 8388616, 0, 28704, 8388616, 0, 28704, 8388616, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 5952, 1145324612, 0, 6400, 559240, 0, 6400, 559240, 0, 6400, 559240, 0, 6400, 559240, 0, 6400, 559240, 0, 7040, 17, 0, 7040, 17, 0, 10176, 8, 0, 12224, 8390656, 0, 12224, 8390656, 0, 16340, 4096, 0, 16356, 4096, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 20096, 1145324612, 0, 21824, 8388608, 0, 21840, 8388608, 0, 21856, 8388608, 0, 27264, 134217728, 0, 27280, 134217728, 0, 27296, 134217728, 0, 27712, 34952, 0, 27712, 34952, 0, 27712, 34952, 0, 27712, 34952, 0, 27728, 34952, 0, 27728, 34952, 0, 27728, 34952, 0, 27728, 34952, 0, 27744, 34952, 0, 27744, 34952, 0, 27744, 34952, 0, 27744, 34952, 0, 28672, 8388616, 0, 28672, 8388616, 0, 28688, 8388616, 0, 28688, 8388616, 0, 28704, 8388616, 0, 28704, 8388616, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756416428558030069_967_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756416428558030069_967_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca699d2a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756416428558030069_967_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 21)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 24))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 22))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 26))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 2097152, 0, 6080, 2097152, 0, 12864, 16640, 0, 12864, 16640, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14016, 2818572290, 0, 14016, 2818572290, 0, 14016, 2818572290, 0, 14016, 2818572290, 0, 1024, 2097152, 0, 6080, 2097152, 0, 12864, 16640, 0, 12864, 16640, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14400, 1431655765, 0, 14016, 2818572290, 0, 14016, 2818572290, 0, 14016, 2818572290, 0, 14016, 2818572290, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417538028567074_969_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417538028567074_969_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0b2f679 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417538028567074_969_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417625009836038_971_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417625009836038_971_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..51b4e9ad --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417625009836038_971_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 2240, 272696336, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0, 4736, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417625555240743_972_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417625555240743_972_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..185544d9 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417625555240743_972_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 21))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 28)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 8))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2688, 2097152, 0, 5120, 32, 0, 6224, 1073741824, 0, 7296, 33562626, 0, 7296, 33562626, 0, 7296, 33562626, 0, 7616, 604127268, 0, 7616, 604127268, 0, 7616, 604127268, 0, 7616, 604127268, 0, 7616, 604127268, 0, 7616, 604127268, 0, 9280, 134480384, 0, 9280, 134480384, 0, 9280, 134480384, 0, 9296, 134480384, 0, 9296, 134480384, 0, 9296, 134480384, 0, 11584, 2147483648, 0, 576, 17, 0, 576, 17, 0, 2688, 2097152, 0, 5120, 32, 0, 6224, 1073741824, 0, 7296, 33562626, 0, 7296, 33562626, 0, 7296, 33562626, 0, 7616, 604127268, 0, 7616, 604127268, 0, 7616, 604127268, 0, 7616, 604127268, 0, 7616, 604127268, 0, 7616, 604127268, 0, 9280, 134480384, 0, 9280, 134480384, 0, 9280, 134480384, 0, 9296, 134480384, 0, 9296, 134480384, 0, 9296, 134480384, 0, 11584, 2147483648, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417662475204882_974_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417662475204882_974_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66b0d410 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417662475204882_974_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,134 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 272696336, 0, 1792, 272696336, 0, 1792, 272696336, 0, 1792, 272696336, 0, 1792, 272696336, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 272696336, 0, 1792, 272696336, 0, 1792, 272696336, 0, 1792, 272696336, 0, 1792, 272696336, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0, 2432, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417662847998456_975_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417662847998456_975_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..971c32ed --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417662847998456_975_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1408, 16778240, 0, 1408, 16778240, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1920, 511, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1664, 2863311360, 0, 1408, 16778240, 0, 1408, 16778240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417664259292517_977_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417664259292517_977_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d85349c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417664259292517_977_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 30)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 3264, 559240, 0, 3264, 559240, 0, 3264, 559240, 0, 3264, 559240, 0, 3264, 559240, 0, 3904, 73, 0, 3904, 73, 0, 3904, 73, 0, 4480, 272696336, 0, 4480, 272696336, 0, 4480, 272696336, 0, 4480, 272696336, 0, 4480, 272696336, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 576, 17, 0, 576, 17, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 2816, 1145324612, 0, 3264, 559240, 0, 3264, 559240, 0, 3264, 559240, 0, 3264, 559240, 0, 3264, 559240, 0, 3904, 73, 0, 3904, 73, 0, 3904, 73, 0, 4480, 272696336, 0, 4480, 272696336, 0, 4480, 272696336, 0, 4480, 272696336, 0, 4480, 272696336, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0, 4800, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417862703357922_980_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417862703357922_980_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6b57bc33 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417862703357922_980_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 2453667842, 0, 1792, 2453667842, 0, 1792, 2453667842, 0, 1792, 2453667842, 0, 1792, 2453667842, 0, 1808, 2453667842, 0, 1808, 2453667842, 0, 1808, 2453667842, 0, 1808, 2453667842, 0, 1808, 2453667842, 0, 4992, 2, 0, 5008, 2, 0, 5440, 524288, 0, 5456, 524288, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 6592, 73, 0, 6592, 73, 0, 6592, 73, 0, 7168, 272696336, 0, 7168, 272696336, 0, 7168, 272696336, 0, 7168, 272696336, 0, 7168, 272696336, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 2453667842, 0, 1792, 2453667842, 0, 1792, 2453667842, 0, 1792, 2453667842, 0, 1792, 2453667842, 0, 1808, 2453667842, 0, 1808, 2453667842, 0, 1808, 2453667842, 0, 1808, 2453667842, 0, 1808, 2453667842, 0, 4992, 2, 0, 5008, 2, 0, 5440, 524288, 0, 5456, 524288, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 5952, 613566756, 0, 6592, 73, 0, 6592, 73, 0, 6592, 73, 0, 7168, 272696336, 0, 7168, 272696336, 0, 7168, 272696336, 0, 7168, 272696336, 0, 7168, 272696336, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0, 7488, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417865709783201_981_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417865709783201_981_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09bde0f7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417865709783201_981_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 16)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3456, 2147483648, 0, 3072, 32768, 0, 2816, 536870944, 0, 2816, 536870944, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 4096, 17, 0, 4096, 17, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 5632, 8, 0, 6528, 8390656, 0, 6528, 8390656, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3456, 2147483648, 0, 3072, 32768, 0, 2816, 536870944, 0, 2816, 536870944, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 2432, 1610547200, 0, 4096, 17, 0, 4096, 17, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 4992, 1145324612, 0, 5632, 8, 0, 6528, 8390656, 0, 6528, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417868629338173_982_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417868629338173_982_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fe038fd3 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417868629338173_982_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() >= 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 17))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((272 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10752, 64, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 16448, 2048, 0, 16464, 2048, 0, 17408, 134217728, 0, 17412, 134217728, 0, 17424, 134217728, 0, 17428, 134217728, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 7232, 286331153, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10112, 572662304, 0, 10752, 64, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 11904, 572662304, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 14272, 2290649224, 0, 16448, 2048, 0, 16464, 2048, 0, 17408, 134217728, 0, 17412, 134217728, 0, 17424, 134217728, 0, 17428, 134217728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417879870598965_983_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417879870598965_983_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..42e3fde8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417879870598965_983_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,218 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 11))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 6272, 68174084, 0, 6272, 68174084, 0, 6272, 68174084, 0, 6272, 68174084, 0, 6272, 68174084, 0, 7680, 65, 0, 7680, 65, 0, 9088, 272696336, 0, 9088, 272696336, 0, 9088, 272696336, 0, 9088, 272696336, 0, 9088, 272696336, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 6272, 68174084, 0, 6272, 68174084, 0, 6272, 68174084, 0, 6272, 68174084, 0, 6272, 68174084, 0, 7680, 65, 0, 7680, 65, 0, 9088, 272696336, 0, 9088, 272696336, 0, 9088, 272696336, 0, 9088, 272696336, 0, 9088, 272696336, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0, 9408, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417882041443745_984_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417882041443745_984_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4f1496bd --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417882041443745_984_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 11))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 8)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 28)) { + if ((WaveGetLaneIndex() >= 18)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 22))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 31))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 9)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 17))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4820, 546, 0, 4820, 546, 0, 4820, 546, 0, 4824, 546, 0, 4824, 546, 0, 4824, 546, 0, 4836, 546, 0, 4836, 546, 0, 4836, 546, 0, 4840, 546, 0, 4840, 546, 0, 4840, 546, 0, 5264, 34, 0, 5264, 34, 0, 5280, 34, 0, 5280, 34, 0, 7552, 1145044992, 0, 7552, 1145044992, 0, 7552, 1145044992, 0, 9216, 4, 0, 9232, 4, 0, 11328, 1140850688, 0, 11328, 1140850688, 0, 11776, 559240, 0, 11776, 559240, 0, 11776, 559240, 0, 11776, 559240, 0, 11776, 559240, 0, 15360, 8, 0, 17920, 545390592, 0, 17920, 545390592, 0, 17920, 545390592, 0, 576, 17, 0, 576, 17, 0, 4820, 546, 0, 4820, 546, 0, 4820, 546, 0, 4824, 546, 0, 4824, 546, 0, 4824, 546, 0, 4836, 546, 0, 4836, 546, 0, 4836, 546, 0, 4840, 546, 0, 4840, 546, 0, 4840, 546, 0, 5264, 34, 0, 5264, 34, 0, 5280, 34, 0, 5280, 34, 0, 7552, 1145044992, 0, 7552, 1145044992, 0, 7552, 1145044992, 0, 9216, 4, 0, 9232, 4, 0, 11328, 1140850688, 0, 11328, 1140850688, 0, 11776, 559240, 0, 11776, 559240, 0, 11776, 559240, 0, 11776, 559240, 0, 11776, 559240, 0, 15360, 8, 0, 17920, 545390592, 0, 17920, 545390592, 0, 17920, 545390592, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756417900828093391_985_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756417900828093391_985_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..55a4be3e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756417900828093391_985_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,77 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 1024, 7, 0, 1024, 7, 0, 1024, 7, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 1024, 7, 0, 1024, 7, 0, 1024, 7, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418146921006056_990_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418146921006056_990_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8872bdcf --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418146921006056_990_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 31)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 1073741833, 0, 1936, 1073741833, 0, 1936, 1073741833, 0, 1952, 1073741833, 0, 1952, 1073741833, 0, 1952, 1073741833, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 1936, 1073741833, 0, 1936, 1073741833, 0, 1936, 1073741833, 0, 1952, 1073741833, 0, 1952, 1073741833, 0, 1952, 1073741833, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0, 6464, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418150603278691_991_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418150603278691_991_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f8d7c9ad --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418150603278691_991_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4096, 85, 0, 4096, 85, 0, 4096, 85, 0, 4096, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4096, 85, 0, 4096, 85, 0, 4096, 85, 0, 4096, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418151417922267_992_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418151417922267_992_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bc41a0d8 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418151417922267_992_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 25))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1632 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1152, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 1168, 4261412895, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2560, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2564, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2568, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2576, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2580, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 2584, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3136, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3140, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3144, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3152, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3156, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3160, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3712, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 3728, 2863311530, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4544, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0, 4560, 4278190095, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418159339304240_993_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418159339304240_993_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..814df56e --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418159339304240_993_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,298 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 26))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 27))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 24))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 13)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 27)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 714 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2048, 2147483650, 0, 2048, 2147483650, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 3072, 272629760, 0, 3072, 272629760, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 5440, 268435456, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7572, 545392672, 0, 7572, 545392672, 0, 7572, 545392672, 0, 7572, 545392672, 0, 7572, 545392672, 0, 7576, 545392672, 0, 7576, 545392672, 0, 7576, 545392672, 0, 7576, 545392672, 0, 7576, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7588, 545392672, 0, 7588, 545392672, 0, 7588, 545392672, 0, 7588, 545392672, 0, 7588, 545392672, 0, 7592, 545392672, 0, 7592, 545392672, 0, 7592, 545392672, 0, 7592, 545392672, 0, 7592, 545392672, 0, 8144, 545392672, 0, 8144, 545392672, 0, 8144, 545392672, 0, 8144, 545392672, 0, 8144, 545392672, 0, 8148, 545392672, 0, 8148, 545392672, 0, 8148, 545392672, 0, 8148, 545392672, 0, 8148, 545392672, 0, 8152, 545392672, 0, 8152, 545392672, 0, 8152, 545392672, 0, 8152, 545392672, 0, 8152, 545392672, 0, 8160, 545392672, 0, 8160, 545392672, 0, 8160, 545392672, 0, 8160, 545392672, 0, 8160, 545392672, 0, 8164, 545392672, 0, 8164, 545392672, 0, 8164, 545392672, 0, 8164, 545392672, 0, 8164, 545392672, 0, 8168, 545392672, 0, 8168, 545392672, 0, 8168, 545392672, 0, 8168, 545392672, 0, 8168, 545392672, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 12752, 1, 0, 12768, 1, 0, 12784, 1, 0, 13312, 272696336, 0, 13312, 272696336, 0, 13312, 272696336, 0, 13312, 272696336, 0, 13312, 272696336, 0, 14464, 536870912, 0, 14480, 536870912, 0, 15680, 2048, 0, 15696, 2048, 0, 16256, 545392672, 0, 16256, 545392672, 0, 16256, 545392672, 0, 16256, 545392672, 0, 16256, 545392672, 0, 16272, 545392672, 0, 16272, 545392672, 0, 16272, 545392672, 0, 16272, 545392672, 0, 16272, 545392672, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 2048, 2147483650, 0, 2048, 2147483650, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 2624, 2181570690, 0, 3072, 272629760, 0, 3072, 272629760, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 4288, 272696336, 0, 5440, 268435456, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7568, 545392672, 0, 7572, 545392672, 0, 7572, 545392672, 0, 7572, 545392672, 0, 7572, 545392672, 0, 7572, 545392672, 0, 7576, 545392672, 0, 7576, 545392672, 0, 7576, 545392672, 0, 7576, 545392672, 0, 7576, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7584, 545392672, 0, 7588, 545392672, 0, 7588, 545392672, 0, 7588, 545392672, 0, 7588, 545392672, 0, 7588, 545392672, 0, 7592, 545392672, 0, 7592, 545392672, 0, 7592, 545392672, 0, 7592, 545392672, 0, 7592, 545392672, 0, 8144, 545392672, 0, 8144, 545392672, 0, 8144, 545392672, 0, 8144, 545392672, 0, 8144, 545392672, 0, 8148, 545392672, 0, 8148, 545392672, 0, 8148, 545392672, 0, 8148, 545392672, 0, 8148, 545392672, 0, 8152, 545392672, 0, 8152, 545392672, 0, 8152, 545392672, 0, 8152, 545392672, 0, 8152, 545392672, 0, 8160, 545392672, 0, 8160, 545392672, 0, 8160, 545392672, 0, 8160, 545392672, 0, 8160, 545392672, 0, 8164, 545392672, 0, 8164, 545392672, 0, 8164, 545392672, 0, 8164, 545392672, 0, 8164, 545392672, 0, 8168, 545392672, 0, 8168, 545392672, 0, 8168, 545392672, 0, 8168, 545392672, 0, 8168, 545392672, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10256, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10272, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 10288, 1224736841, 0, 12752, 1, 0, 12768, 1, 0, 12784, 1, 0, 13312, 272696336, 0, 13312, 272696336, 0, 13312, 272696336, 0, 13312, 272696336, 0, 13312, 272696336, 0, 14464, 536870912, 0, 14480, 536870912, 0, 15680, 2048, 0, 15696, 2048, 0, 16256, 545392672, 0, 16256, 545392672, 0, 16256, 545392672, 0, 16256, 545392672, 0, 16256, 545392672, 0, 16272, 545392672, 0, 16272, 545392672, 0, 16272, 545392672, 0, 16272, 545392672, 0, 16272, 545392672, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418329838596943_995_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418329838596943_995_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..50e9225c --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418329838596943_995_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,303 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 27)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 15)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 18))) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 24))) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((198 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 12)) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4112, 1207959552, 0, 4112, 1207959552, 0, 4128, 1207959552, 0, 4128, 1207959552, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 5632, 17, 0, 5632, 17, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 10752, 8, 0, 10768, 8, 0, 14592, 2147483648, 0, 14608, 2147483648, 0, 17152, 559232, 0, 17152, 559232, 0, 17152, 559232, 0, 17152, 559232, 0, 17168, 559232, 0, 17168, 559232, 0, 17168, 559232, 0, 17168, 559232, 0, 18176, 8, 0, 18192, 8, 0, 4112, 1207959552, 0, 4112, 1207959552, 0, 4128, 1207959552, 0, 4128, 1207959552, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4672, 272696336, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 4992, 613566756, 0, 5632, 17, 0, 5632, 17, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 6208, 286331153, 0, 10752, 8, 0, 10768, 8, 0, 14592, 2147483648, 0, 14608, 2147483648, 0, 17152, 559232, 0, 17152, 559232, 0, 17152, 559232, 0, 17152, 559232, 0, 17168, 559232, 0, 17168, 559232, 0, 17168, 559232, 0, 17168, 559232, 0, 18176, 8, 0, 18192, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418363971322648_996_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418363971322648_996_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca5aaef7 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418363971322648_996_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,268 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 28))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1472, 272696336, 0, 1472, 272696336, 0, 1472, 272696336, 0, 1472, 272696336, 0, 1472, 272696336, 0, 2688, 272696336, 0, 2688, 272696336, 0, 2688, 272696336, 0, 2688, 272696336, 0, 2688, 272696336, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 6912, 585, 0, 6912, 585, 0, 6912, 585, 0, 6912, 585, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1472, 272696336, 0, 1472, 272696336, 0, 1472, 272696336, 0, 1472, 272696336, 0, 1472, 272696336, 0, 2688, 272696336, 0, 2688, 272696336, 0, 2688, 272696336, 0, 2688, 272696336, 0, 2688, 272696336, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 4160, 613566756, 0, 6912, 585, 0, 6912, 585, 0, 6912, 585, 0, 6912, 585, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0, 10944, 613566756, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418366816506515_997_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418366816506515_997_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e19158ec --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418366816506515_997_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,313 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 19))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 11)) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 24)) { + if ((WaveGetLaneIndex() >= 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 14)) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 25))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 27))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 23))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 10)) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3408, 268435456, 0, 3424, 268435456, 0, 3440, 268435456, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 9600, 32, 0, 9616, 32, 0, 9632, 32, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 12672, 2818572290, 0, 12672, 2818572290, 0, 12672, 2818572290, 0, 12672, 2818572290, 0, 15808, 17, 0, 15808, 17, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 18624, 32768, 0, 18640, 32768, 0, 18656, 32768, 0, 3408, 268435456, 0, 3424, 268435456, 0, 3440, 268435456, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 6592, 1163220309, 0, 9600, 32, 0, 9616, 32, 0, 9632, 32, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 10496, 2862787114, 0, 12672, 2818572290, 0, 12672, 2818572290, 0, 12672, 2818572290, 0, 12672, 2818572290, 0, 15808, 17, 0, 15808, 17, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 16704, 1145324612, 0, 18624, 32768, 0, 18640, 32768, 0, 18656, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418375430675608_998_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418375430675608_998_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..76b850d6 --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418375430675608_998_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,109 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418375733375813_999_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418375733375813_999_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c57bc58a --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418375733375813_999_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((counter2 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1856, 1426063360, 0, 1856, 1426063360, 0, 1856, 1426063360, 0, 1856, 1426063360, 0, 1872, 1426063360, 0, 1872, 1426063360, 0, 1872, 1426063360, 0, 1872, 1426063360, 0, 2880, 2147483648, 0, 4992, 85, 0, 4992, 85, 0, 4992, 85, 0, 4992, 85, 0, 7940, 559240, 0, 7940, 559240, 0, 7940, 559240, 0, 7940, 559240, 0, 7940, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1856, 1426063360, 0, 1856, 1426063360, 0, 1856, 1426063360, 0, 1856, 1426063360, 0, 1872, 1426063360, 0, 1872, 1426063360, 0, 1872, 1426063360, 0, 1872, 1426063360, 0, 2880, 2147483648, 0, 4992, 85, 0, 4992, 85, 0, 4992, 85, 0, 4992, 85, 0, 7940, 559240, 0, 7940, 559240, 0, 7940, 559240, 0, 7940, 559240, 0, 7940, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7956, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize32BitTracking/tests/program_1756418383397412354_1000_increment_0_WaveParticipantBitTracking.test b/test/WaveSize32BitTracking/tests/program_1756418383397412354_1000_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c14cebfc --- /dev/null +++ b/test/WaveSize32BitTracking/tests/program_1756418383397412354_1000_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1152, 272696336, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 1472, 613566756, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0, 2112, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267509324389201_1_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267509324389201_1_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..50cfe6d2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267509324389201_1_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((24 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((198 << 6) | (i3 << 4)) | (counter4 << 2)) | counter5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((205 << 6) | (i3 << 4)) | (counter4 << 2)) | counter5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1556, 5, 0, 1556, 5, 0, 1560, 5, 0, 1560, 5, 0, 1572, 5, 0, 1572, 5, 0, 1576, 5, 0, 1576, 5, 0, 1588, 5, 0, 1588, 5, 0, 1592, 5, 0, 1592, 5, 0, 2516, 8, 0, 2520, 8, 0, 2532, 8, 0, 2536, 8, 0, 2548, 8, 0, 2552, 8, 0, 13125, 2, 0, 13126, 2, 0, 13129, 2, 0, 13130, 2, 0, 13133, 2, 0, 13134, 2, 0, 13141, 2, 0, 13142, 2, 0, 13145, 2, 0, 13146, 2, 0, 13149, 2, 0, 13150, 2, 0, 14016, 1, 0, 14032, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267509867902936_2_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267509867902936_2_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c2e65a2a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267509867902936_2_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,473 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((401 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((417 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((426 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (430 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (445 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 5, 0, 1088, 5, 0, 1728, 1, 0, 3648, 4, 0, 4800, 4, 0, 7040, 2, 0, 9152, 8, 0, 9792, 5, 0, 9792, 5, 0, 15680, 8, 0, 16592, 8, 0, 16608, 8, 0, 18512, 8, 0, 18528, 8, 0, 19216, 2, 0, 19232, 2, 0, 21696, 1, 0, 27280, 8, 0, 27296, 8, 0, 27312, 8, 0, 28480, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267509999551500_3_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267509999551500_3_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4660ccc3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267509999551500_3_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 4, 0, 2512, 4, 0, 2528, 4, 0, 3200, 1, 0, 3216, 1, 0, 3232, 1, 0, 4224, 8, 0, 5760, 9, 0, 5760, 9, 0, 7552, 2, 0, 7568, 2, 0, 9664, 2, 0, 9680, 2, 0, 9984, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267510103017546_4_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267510103017546_4_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..18ccc78b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267510103017546_4_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,292 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [13120, 8, 0, 13136, 8, 0, 13152, 8, 0, 15488, 10, 0, 15488, 10, 0, 16960, 9, 0, 16960, 9, 0, 16976, 9, 0, 16976, 9, 0, 19200, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267510185971001_5_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267510185971001_5_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..98454cd1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267510185971001_5_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,94 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 6, 0, 1472, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267510236973302_6_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267510236973302_6_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..be002cdf --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267510236973302_6_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,209 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 10, 0, 832, 10, 0, 1472, 1, 0, 2368, 4, 0, 4160, 4, 0, 5376, 4, 0, 5392, 4, 0, 8000, 1, 0, 9792, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267510438724721_8_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267510438724721_8_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..70c63ef2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267510438724721_8_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,446 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((203 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + break; + } + case 3: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((260 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((274 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i6 == 2)) { + break; + } + } + if ((i5 == 1)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((339 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (357 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((420 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i9 = 0; (i9 < 2); i9 = (i9 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((439 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((450 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i9 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((460 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4992, 1, 0, 6032, 1, 0, 7744, 1, 0, 9984, 4, 0, 15168, 8, 0, 15184, 8, 0, 16640, 8, 0, 16644, 8, 0, 16648, 8, 0, 16656, 8, 0, 16660, 8, 0, 16664, 8, 0, 19584, 1, 0, 21696, 8, 0, 21712, 8, 0, 22272, 8, 0, 28112, 8, 0, 28116, 8, 0, 28128, 8, 0, 28132, 8, 0, 29456, 2, 0, 29472, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267510734356171_9_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267510734356171_9_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cbaa5bc3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267510734356171_9_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,446 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((121 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (383 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6592, 4, 0, 6608, 4, 0, 7748, 4, 0, 7752, 4, 0, 7764, 4, 0, 7768, 4, 0, 9536, 8, 0, 13440, 5, 0, 13440, 5, 0, 12544, 8, 0, 14976, 1, 0, 15616, 1, 0, 16320, 1, 0, 23936, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267510848019785_10_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267510848019785_10_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7045152e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267510848019785_10_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 5376, 10, 0, 5376, 10, 0, 4864, 4, 0, 4480, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267510894646104_11_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267510894646104_11_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b2584be5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267510894646104_11_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,194 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 3392, 4, 0, 4544, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267510941014421_12_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267510941014421_12_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..abc8e141 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267510941014421_12_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267510983041121_13_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267510983041121_13_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5e3f718e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267510983041121_13_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 2, 0, 960, 9, 0, 960, 9, 0, 4164, 4, 0, 4168, 4, 0, 4180, 4, 0, 4184, 4, 0, 4196, 4, 0, 4200, 4, 0, 4612, 8, 0, 4616, 8, 0, 4628, 8, 0, 4632, 8, 0, 4644, 8, 0, 4648, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267511130305315_14_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267511130305315_14_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..139f69d1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267511130305315_14_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0, 1104, 1, 0, 1120, 1, 0, 2752, 5, 0, 2752, 5, 0, 2768, 5, 0, 2768, 5, 0, 2784, 5, 0, 2784, 5, 0, 3712, 4, 0, 3728, 4, 0, 3744, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267511205634905_15_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267511205634905_15_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d2d1c1d6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267511205634905_15_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((110 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0, 1104, 1, 0, 5056, 8, 0, 5072, 8, 0, 7044, 10, 0, 7044, 10, 0, 7048, 10, 0, 7048, 10, 0, 7052, 10, 0, 7052, 10, 0, 7060, 10, 0, 7060, 10, 0, 7064, 10, 0, 7064, 10, 0, 7068, 10, 0, 7068, 10, 0, 10176, 4, 0, 10192, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267511337627047_16_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267511337627047_16_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5b56166e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267511337627047_16_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((176 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1216, 1, 0, 4224, 1, 0, 5120, 2, 0, 8960, 14, 0, 8960, 14, 0, 8960, 14, 0, 8976, 14, 0, 8976, 14, 0, 8976, 14, 0, 10624, 4, 0, 10628, 4, 0, 10632, 4, 0, 10640, 4, 0, 10644, 4, 0, 10648, 4, 0, 12224, 10, 0, 12224, 10, 0, 12240, 10, 0, 12240, 10, 0, 13184, 10, 0, 13184, 10, 0, 13200, 10, 0, 13200, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267511554279507_18_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267511554279507_18_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..55ecaa81 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267511554279507_18_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 153 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 1, 0, 1056, 1, 0, 1072, 1, 0, 2192, 8, 0, 2196, 8, 0, 2200, 8, 0, 2208, 8, 0, 2212, 8, 0, 2216, 8, 0, 2224, 8, 0, 2228, 8, 0, 2232, 8, 0, 2768, 8, 0, 2772, 8, 0, 2776, 8, 0, 2784, 8, 0, 2788, 8, 0, 2792, 8, 0, 2800, 8, 0, 2804, 8, 0, 2808, 8, 0, 3472, 1, 0, 3476, 1, 0, 3480, 1, 0, 3488, 1, 0, 3492, 1, 0, 3496, 1, 0, 3504, 1, 0, 3508, 1, 0, 3512, 1, 0, 4304, 1, 0, 4308, 1, 0, 4312, 1, 0, 4320, 1, 0, 4324, 1, 0, 4328, 1, 0, 4336, 1, 0, 4340, 1, 0, 4344, 1, 0, 4752, 8, 0, 4756, 8, 0, 4760, 8, 0, 4768, 8, 0, 4772, 8, 0, 4776, 8, 0, 4784, 8, 0, 4788, 8, 0, 4792, 8, 0, 10048, 4, 0, 10944, 5, 0, 10944, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267511862917037_19_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267511862917037_19_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d434a2e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267511862917037_19_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,81 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 10, 0, 1600, 10, 0, 2512, 8, 0, 2528, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267511908652060_20_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267511908652060_20_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d9a16633 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267511908652060_20_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 1, 0, 2752, 1, 0, 4800, 13, 0, 4800, 13, 0, 4800, 13, 0, 4816, 13, 0, 4816, 13, 0, 4816, 13, 0, 6272, 10, 0, 6272, 10, 0, 6276, 10, 0, 6276, 10, 0, 6288, 10, 0, 6288, 10, 0, 6292, 10, 0, 6292, 10, 0, 7232, 14, 0, 7232, 14, 0, 7232, 14, 0, 7236, 14, 0, 7236, 14, 0, 7236, 14, 0, 7248, 14, 0, 7248, 14, 0, 7248, 14, 0, 7252, 14, 0, 7252, 14, 0, 7252, 14, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267512003311884_21_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267512003311884_21_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..de46a68e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267512003311884_21_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 5, 0, 1296, 5, 0, 1312, 5, 0, 1312, 5, 0, 1328, 5, 0, 1328, 5, 0, 2580, 5, 0, 2580, 5, 0, 2596, 5, 0, 2596, 5, 0, 2612, 5, 0, 2612, 5, 0, 3860, 5, 0, 3860, 5, 0, 3876, 5, 0, 3876, 5, 0, 3892, 5, 0, 3892, 5, 0, 4564, 1, 0, 4580, 1, 0, 4596, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267512140837720_23_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267512140837720_23_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f8e66407 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267512140837720_23_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 1, 0, 912, 1, 0, 3264, 4, 0, 3280, 4, 0, 3712, 4, 0, 3728, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267512195301699_24_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267512195301699_24_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6c731a6f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267512195301699_24_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1152, 5, 0, 1152, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267512238573405_25_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267512238573405_25_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ba23a172 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267512238573405_25_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 2, 0, 4544, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267512282995063_26_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267512282995063_26_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5ca5d39b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267512282995063_26_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 13, 0, 2688, 13, 0, 2688, 13, 0, 3328, 5, 0, 3328, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267512327052767_27_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267512327052767_27_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7bf9a78d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267512327052767_27_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 9, 0, 768, 9, 0, 7552, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267512374475441_28_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267512374475441_28_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..911d8d7d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267512374475441_28_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,270 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 7616, 8, 0, 8768, 5, 0, 8768, 5, 0, 12608, 5, 0, 12608, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267512437037956_29_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267512437037956_29_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f902377e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267512437037956_29_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,444 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((399 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (403 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (427 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (434 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 10116, 4, 0, 10120, 4, 0, 10124, 4, 0, 10132, 4, 0, 10136, 4, 0, 10140, 4, 0, 10148, 4, 0, 10152, 4, 0, 10156, 4, 0, 11200, 8, 0, 13056, 8, 0, 16064, 1, 0, 17216, 1, 0, 17920, 1, 0, 18496, 1, 0, 19392, 10, 0, 19392, 10, 0, 20096, 1, 0, 22400, 1, 0, 24064, 10, 0, 24064, 10, 0, 25536, 10, 0, 25536, 10, 0, 25552, 10, 0, 25552, 10, 0, 26432, 1, 0, 27328, 4, 0, 27776, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267513126146939_34_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267513126146939_34_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..70f19862 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267513126146939_34_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,315 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((WaveGetLaneIndex() < 2)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((248 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + if ((i6 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 10, 0, 976, 10, 0, 992, 10, 0, 992, 10, 0, 1552, 5, 0, 1552, 5, 0, 1568, 5, 0, 1568, 5, 0, 2368, 1, 0, 3264, 4, 0, 6208, 1, 0, 10496, 8, 0, 10944, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267513291064842_36_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267513291064842_36_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b94748e2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267513291064842_36_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,352 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 4096, 4, 0, 6464, 1, 0, 6468, 1, 0, 6480, 1, 0, 6484, 1, 0, 6496, 1, 0, 6500, 1, 0, 7616, 1, 0, 7632, 1, 0, 7648, 1, 0, 16320, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267513487384199_38_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267513487384199_38_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..414b7235 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267513487384199_38_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 5, 0, 896, 5, 0, 4480, 5, 0, 4480, 5, 0, 4928, 2, 0, 5760, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267513543963308_39_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267513543963308_39_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cfb1c8f1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267513543963308_39_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((51 << 6) | (counter0 << 4)) | (i1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 10, 0, 976, 10, 0, 992, 10, 0, 992, 10, 0, 3281, 4, 0, 3285, 4, 0, 3297, 4, 0, 3301, 4, 0, 4176, 10, 0, 4176, 10, 0, 4192, 10, 0, 4192, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267513640753222_40_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267513640753222_40_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..abc8e141 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267513640753222_40_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267513696958181_41_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267513696958181_41_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..70db4047 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267513696958181_41_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,391 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 1984, 1, 0, 11776, 4, 0, 12688, 8, 0, 12704, 8, 0, 12720, 8, 0, 19200, 1, 0, 19840, 8, 0, 20736, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267513787985798_42_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267513787985798_42_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4b9b667c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267513787985798_42_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,344 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((173 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 8064, 8, 0, 10132, 1, 0, 10136, 1, 0, 10148, 1, 0, 10152, 1, 0, 11092, 1, 0, 11096, 1, 0, 11108, 1, 0, 11112, 1, 0, 12544, 4, 0, 17088, 10, 0, 17088, 10, 0, 18560, 4, 0, 18576, 4, 0, 18592, 4, 0, 20224, 4, 0, 20240, 4, 0, 20256, 4, 0, 20800, 4, 0, 20816, 4, 0, 20832, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267513918201486_43_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267513918201486_43_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..55716b4c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267513918201486_43_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 9, 0, 1296, 9, 0, 1312, 9, 0, 1312, 9, 0, 1936, 1, 0, 1952, 1, 0, 2640, 9, 0, 2640, 9, 0, 2656, 9, 0, 2656, 9, 0, 5968, 2, 0, 5984, 2, 0, 6272, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267513988414280_44_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267513988414280_44_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..436ba6bb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267513988414280_44_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 9, 0, 768, 9, 0, 6976, 2, 0, 6980, 2, 0, 6984, 2, 0, 6992, 2, 0, 6996, 2, 0, 7000, 2, 0, 7616, 2, 0, 7632, 2, 0, 9856, 4, 0, 11520, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267514057494936_45_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267514057494936_45_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..856ebb41 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267514057494936_45_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,166 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3648, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267514204604321_47_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267514204604321_47_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..78fe7ad3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267514204604321_47_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,363 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((260 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((271 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((280 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((299 << 6) | (counter4 << 4)) | (i5 << 2)) | i6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + if ((i6 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((309 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((318 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 147 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 1, 0, 6272, 12, 0, 6272, 12, 0, 8208, 8, 0, 8212, 8, 0, 9088, 8, 0, 9728, 9, 0, 9728, 9, 0, 12672, 4, 0, 14352, 10, 0, 14352, 10, 0, 14356, 10, 0, 14356, 10, 0, 14368, 10, 0, 14368, 10, 0, 14372, 10, 0, 14372, 10, 0, 14992, 9, 0, 14992, 9, 0, 14996, 9, 0, 14996, 9, 0, 15008, 9, 0, 15008, 9, 0, 15012, 9, 0, 15012, 9, 0, 16656, 2, 0, 16660, 2, 0, 16672, 2, 0, 16676, 2, 0, 19152, 4, 0, 19153, 4, 0, 19156, 4, 0, 19157, 4, 0, 19168, 4, 0, 19169, 4, 0, 19172, 4, 0, 19173, 4, 0, 20368, 10, 0, 20368, 10, 0, 20372, 10, 0, 20372, 10, 0, 20384, 10, 0, 20384, 10, 0, 20388, 10, 0, 20388, 10, 0, 21200, 5, 0, 21200, 5, 0, 21216, 5, 0, 21216, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267514565296746_48_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267514565296746_48_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b4cf5560 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267514565296746_48_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,328 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((282 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3840, 4, 0, 4880, 1, 0, 4896, 1, 0, 6352, 4, 0, 6356, 4, 0, 6368, 4, 0, 6372, 4, 0, 6992, 4, 0, 7008, 4, 0, 8000, 5, 0, 8000, 5, 0, 8896, 5, 0, 8896, 5, 0, 10112, 1, 0, 16576, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267514911323302_50_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267514911323302_50_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e58c0a82 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267514911323302_50_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,252 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 99 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 13, 0, 1488, 13, 0, 1488, 13, 0, 1504, 13, 0, 1504, 13, 0, 1504, 13, 0, 1520, 13, 0, 1520, 13, 0, 1520, 13, 0, 5012, 8, 0, 5016, 8, 0, 5020, 8, 0, 5028, 8, 0, 5032, 8, 0, 5036, 8, 0, 5044, 8, 0, 5048, 8, 0, 5052, 8, 0, 5968, 8, 0, 5984, 8, 0, 6000, 8, 0, 6928, 7, 0, 6928, 7, 0, 6928, 7, 0, 6944, 7, 0, 6944, 7, 0, 6944, 7, 0, 6960, 7, 0, 6960, 7, 0, 6960, 7, 0, 7744, 9, 0, 7744, 9, 0, 14016, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267515077148144_51_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267515077148144_51_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..60ad49ab --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267515077148144_51_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,311 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 99 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1152, 1, 0, 2688, 4, 0, 4240, 1, 0, 4256, 1, 0, 4272, 1, 0, 4880, 1, 0, 4896, 1, 0, 4912, 1, 0, 6672, 8, 0, 6688, 8, 0, 6704, 8, 0, 9296, 8, 0, 9312, 8, 0, 9328, 8, 0, 10256, 9, 0, 10256, 9, 0, 10272, 9, 0, 10272, 9, 0, 10288, 9, 0, 10288, 9, 0, 14740, 4, 0, 14744, 4, 0, 14748, 4, 0, 14756, 4, 0, 14760, 4, 0, 14764, 4, 0, 14772, 4, 0, 14776, 4, 0, 14780, 4, 0, 16320, 5, 0, 16320, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267515163700403_52_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267515163700403_52_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..917d69f6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267515163700403_52_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,345 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((105 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((150 << 6) | (counter3 << 4)) | (i4 << 2)) | i5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((163 << 6) | (counter3 << 4)) | (i4 << 2)) | i5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((205 << 6) | (counter3 << 4)) | (i4 << 2)) | i6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + if ((counter3 == 1)) { + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((280 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter7 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 153 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4928, 5, 0, 4928, 5, 0, 6736, 5, 0, 6736, 5, 0, 6740, 5, 0, 6740, 5, 0, 8400, 6, 0, 8400, 6, 0, 8404, 6, 0, 8404, 6, 0, 9616, 6, 0, 9616, 6, 0, 9617, 6, 0, 9617, 6, 0, 9618, 6, 0, 9618, 6, 0, 9620, 6, 0, 9620, 6, 0, 9621, 6, 0, 9621, 6, 0, 9622, 6, 0, 9622, 6, 0, 10448, 6, 0, 10448, 6, 0, 10449, 6, 0, 10449, 6, 0, 10450, 6, 0, 10450, 6, 0, 10452, 6, 0, 10452, 6, 0, 10453, 6, 0, 10453, 6, 0, 10454, 6, 0, 10454, 6, 0, 11600, 6, 0, 11600, 6, 0, 11604, 6, 0, 11604, 6, 0, 13136, 8, 0, 13137, 8, 0, 13138, 8, 0, 13140, 8, 0, 13141, 8, 0, 13142, 8, 0, 13904, 10, 0, 13904, 10, 0, 13908, 10, 0, 13908, 10, 0, 14912, 1, 0, 20288, 4, 0, 20736, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267515539229677_53_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267515539229677_53_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b319397 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267515539229677_53_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 6, 0, 1344, 6, 0, 960, 1, 0, 2496, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267515591784431_54_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267515591784431_54_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28b72d6d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267515591784431_54_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,208 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4992, 11, 0, 4992, 11, 0, 4992, 11, 0, 4352, 4, 0, 5632, 9, 0, 5632, 9, 0, 7376, 4, 0, 7392, 4, 0, 7408, 4, 0, 8528, 4, 0, 8544, 4, 0, 8560, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267515650880739_55_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267515650880739_55_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3545ec64 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267515650880739_55_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,262 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3456, 13, 0, 3456, 13, 0, 3456, 13, 0, 4608, 1, 0, 4624, 1, 0, 4640, 1, 0, 5696, 1, 0, 5700, 1, 0, 5704, 1, 0, 5712, 1, 0, 5716, 1, 0, 5720, 1, 0, 5728, 1, 0, 5732, 1, 0, 5736, 1, 0, 6272, 1, 0, 6288, 1, 0, 6304, 1, 0, 13264, 4, 0, 13280, 4, 0, 13296, 4, 0, 13968, 4, 0, 13984, 4, 0, 14000, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267515882292728_57_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267515882292728_57_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e483acf1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267515882292728_57_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,354 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((321 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((338 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [10880, 8, 0, 10896, 8, 0, 13056, 8, 0, 13072, 8, 0, 16320, 4, 0, 19520, 1, 0, 19536, 1, 0, 20548, 1, 0, 20552, 1, 0, 20556, 1, 0, 20564, 1, 0, 20568, 1, 0, 20572, 1, 0, 21632, 1, 0, 21648, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516019983494_58_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516019983494_58_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cb0a0a7d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516019983494_58_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,257 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2368, 4, 0, 2384, 4, 0, 3456, 4, 0, 3460, 4, 0, 3464, 4, 0, 3472, 4, 0, 3476, 4, 0, 3480, 4, 0, 4160, 4, 0, 4176, 4, 0, 7424, 8, 0, 9408, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516091947146_59_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516091947146_59_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f98d161c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516091947146_59_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 1, 0, 1812, 1, 0, 1824, 1, 0, 1828, 1, 0, 2704, 1, 0, 2708, 1, 0, 2720, 1, 0, 2724, 1, 0, 3984, 1, 0, 3988, 1, 0, 4000, 1, 0, 4004, 1, 0, 9920, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516198104947_60_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516198104947_60_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c27218a6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516198104947_60_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,318 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 7, 0, 1472, 7, 0, 1472, 7, 0, 6160, 2, 0, 7188, 2, 0, 7192, 2, 0, 7196, 2, 0, 8852, 2, 0, 8856, 2, 0, 8860, 2, 0, 15296, 4, 0, 15744, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516276400045_61_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516276400045_61_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..05982579 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516276400045_61_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 4480, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516379132125_63_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516379132125_63_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8eef0d06 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516379132125_63_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 4560, 4, 0, 4576, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516427303365_64_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516427303365_64_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..18bb7289 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516427303365_64_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 1232, 1, 0, 1248, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516484726654_65_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516484726654_65_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4df9a38c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516484726654_65_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 1, 0, 2880, 4, 0, 6144, 9, 0, 6144, 9, 0, 8768, 9, 0, 8768, 9, 0, 9664, 6, 0, 9664, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516545970776_66_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516545970776_66_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef283bdf --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516545970776_66_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,244 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 9, 0, 1280, 9, 0, 2944, 1, 0, 4032, 9, 0, 4032, 9, 0, 14656, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516600640739_67_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516600640739_67_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..55da8033 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516600640739_67_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,181 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1216, 8, 0, 6464, 9, 0, 6464, 9, 0, 9728, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516714637213_69_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516714637213_69_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dcaad966 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516714637213_69_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 11, 0, 1408, 11, 0, 1408, 11, 0, 1424, 11, 0, 1424, 11, 0, 1424, 11, 0, 1440, 11, 0, 1440, 11, 0, 1440, 11, 0, 2624, 9, 0, 2624, 9, 0, 2628, 9, 0, 2628, 9, 0, 2632, 9, 0, 2632, 9, 0, 2640, 9, 0, 2640, 9, 0, 2644, 9, 0, 2644, 9, 0, 2648, 9, 0, 2648, 9, 0, 2656, 9, 0, 2656, 9, 0, 2660, 9, 0, 2660, 9, 0, 2664, 9, 0, 2664, 9, 0, 3584, 1, 0, 4736, 1, 0, 7492, 1, 0, 7496, 1, 0, 7508, 1, 0, 7512, 1, 0, 7524, 1, 0, 7528, 1, 0, 8196, 1, 0, 8200, 1, 0, 8212, 1, 0, 8216, 1, 0, 8228, 1, 0, 8232, 1, 0, 9088, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267516868298418_70_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267516868298418_70_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f1ec321b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267516868298418_70_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,434 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((312 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((329 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((383 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (397 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (406 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((440 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((451 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((462 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (472 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (481 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1152, 1, 0, 1472, 4, 0, 7248, 1, 0, 7252, 1, 0, 7256, 1, 0, 7264, 1, 0, 7268, 1, 0, 7272, 1, 0, 7280, 1, 0, 7284, 1, 0, 7288, 1, 0, 8208, 1, 0, 8212, 1, 0, 8216, 1, 0, 8224, 1, 0, 8228, 1, 0, 8232, 1, 0, 8240, 1, 0, 8244, 1, 0, 8248, 1, 0, 19984, 4, 0, 19988, 4, 0, 19992, 4, 0, 21952, 4, 0, 23296, 8, 0, 24512, 8, 0, 24528, 8, 0, 24544, 8, 0, 25408, 8, 0, 28160, 4, 0, 28176, 4, 0, 28192, 4, 0, 30208, 5, 0, 30208, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267517122070460_71_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267517122070460_71_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9c2253c2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267517122070460_71_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 9, 0, 1600, 9, 0, 2688, 3, 0, 2688, 3, 0, 3776, 5, 0, 3776, 5, 0, 4944, 8, 0, 4960, 8, 0, 4976, 8, 0, 6736, 8, 0, 6752, 8, 0, 6768, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267517199911727_72_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267517199911727_72_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..abc8e141 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267517199911727_72_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267517386067606_75_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267517386067606_75_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..223a2499 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267517386067606_75_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 10, 0, 1152, 10, 0, 1168, 10, 0, 1168, 10, 0, 1184, 10, 0, 1184, 10, 0, 3520, 9, 0, 3520, 9, 0, 3524, 9, 0, 3524, 9, 0, 3536, 9, 0, 3536, 9, 0, 3540, 9, 0, 3540, 9, 0, 3552, 9, 0, 3552, 9, 0, 3556, 9, 0, 3556, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267517511726709_76_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267517511726709_76_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..297611d3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267517511726709_76_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,357 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((207 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((218 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((334 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 3136, 1, 0, 4864, 4, 0, 9216, 8, 0, 10112, 1, 0, 21396, 4, 0, 21400, 4, 0, 21404, 4, 0, 21412, 4, 0, 21416, 4, 0, 21420, 4, 0, 21428, 4, 0, 21432, 4, 0, 21436, 4, 0, 22096, 4, 0, 22112, 4, 0, 22128, 4, 0, 22528, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267517618950016_77_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267517618950016_77_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4825d4d2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267517618950016_77_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,259 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i1 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2896, 1, 0, 2912, 1, 0, 2928, 1, 0, 3600, 1, 0, 3616, 1, 0, 3632, 1, 0, 5440, 1, 0, 6080, 5, 0, 6080, 5, 0, 7296, 8, 0, 7312, 8, 0, 7328, 8, 0, 7936, 2, 0, 7952, 2, 0, 7968, 2, 0, 9280, 2, 0, 9296, 2, 0, 9312, 2, 0, 10496, 8, 0, 10500, 8, 0, 10504, 8, 0, 10512, 8, 0, 10516, 8, 0, 10520, 8, 0, 10528, 8, 0, 10532, 8, 0, 10536, 8, 0, 12288, 2, 0, 12304, 2, 0, 12320, 2, 0, 15360, 11, 0, 15360, 11, 0, 15360, 11, 0, 14976, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267517792975876_78_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267517792975876_78_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f7491ab3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267517792975876_78_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 2960, 8, 0, 2976, 8, 0, 2992, 8, 0, 3664, 8, 0, 3680, 8, 0, 3696, 8, 0, 4224, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267517856125721_79_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267517856125721_79_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e7c516b0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267517856125721_79_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,336 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 8, 0, 1232, 8, 0, 1248, 8, 0, 4032, 1, 0, 4048, 1, 0, 4064, 1, 0, 9280, 2, 0, 12112, 4, 0, 12128, 4, 0, 13456, 4, 0, 13472, 4, 0, 14992, 4, 0, 15008, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267517947481514_80_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267517947481514_80_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f65d589d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267517947481514_80_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,373 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((150 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((215 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((256 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((273 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((288 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 3)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((344 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((355 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5312, 4, 0, 5760, 8, 0, 10960, 1, 0, 13780, 4, 0, 13784, 4, 0, 13796, 4, 0, 13800, 4, 0, 17492, 4, 0, 17496, 4, 0, 17508, 4, 0, 17512, 4, 0, 18452, 4, 0, 18456, 4, 0, 18468, 4, 0, 18472, 4, 0, 18896, 4, 0, 18912, 4, 0, 20736, 8, 0, 20752, 8, 0, 20768, 8, 0, 22016, 8, 0, 22032, 8, 0, 22048, 8, 0, 22720, 8, 0, 22736, 8, 0, 22752, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267518075658513_81_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267518075658513_81_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ae44ce54 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267518075658513_81_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,287 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2240, 3, 0, 2240, 3, 0, 2880, 1, 0, 4224, 8, 0, 4928, 4, 0, 6016, 4, 0, 6032, 4, 0, 8128, 9, 0, 8128, 9, 0, 9984, 1, 0, 14208, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267518162844961_82_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267518162844961_82_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..656a2182 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267518162844961_82_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,429 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((254 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((263 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((268 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((counter6 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } + case 2: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((329 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((340 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((349 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((359 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((368 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((375 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((386 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 5, 0, 1600, 5, 0, 3008, 1, 0, 3024, 1, 0, 3040, 1, 0, 4928, 4, 0, 5888, 5, 0, 5888, 5, 0, 7360, 1, 0, 7376, 1, 0, 7392, 1, 0, 9024, 1, 0, 9040, 1, 0, 9056, 1, 0, 9600, 1, 0, 9616, 1, 0, 9632, 1, 0, 18304, 2, 0, 18320, 2, 0, 18336, 2, 0, 19984, 2, 0, 20000, 2, 0, 22352, 2, 0, 22368, 2, 0, 22992, 4, 0, 23008, 4, 0, 25344, 9, 0, 25344, 9, 0, 26240, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267518313399115_83_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267518313399115_83_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0af57f4a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267518313399115_83_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,451 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((238 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((247 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((322 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i9 = 0; (i9 < 3); i9 = (i9 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((370 << 6) | (i8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2320, 2, 0, 2336, 2, 0, 2624, 4, 0, 3072, 8, 0, 5312, 1, 0, 5316, 1, 0, 5320, 1, 0, 5328, 1, 0, 5332, 1, 0, 5336, 1, 0, 11220, 2, 0, 11224, 2, 0, 11236, 2, 0, 11240, 2, 0, 12928, 4, 0, 19136, 8, 0, 24000, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267518609069675_84_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267518609069675_84_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ee42925f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267518609069675_84_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((i2 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 10, 0, 1088, 10, 0, 2880, 8, 0, 3584, 2, 0, 4544, 1, 0, 4560, 1, 0, 5760, 5, 0, 5760, 5, 0, 5764, 5, 0, 5764, 5, 0, 5776, 5, 0, 5776, 5, 0, 5780, 5, 0, 5780, 5, 0, 6912, 1, 0, 6916, 1, 0, 6928, 1, 0, 6932, 1, 0, 7616, 1, 0, 7620, 1, 0, 7632, 1, 0, 7636, 1, 0, 8064, 4, 0, 8068, 4, 0, 8080, 4, 0, 8084, 4, 0, 8896, 5, 0, 8896, 5, 0, 11008, 2, 0, 11024, 2, 0, 13056, 8, 0, 13072, 8, 0, 15040, 8, 0, 15056, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267518871671679_86_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267518871671679_86_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..64ab25f2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267518871671679_86_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,839 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (357 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((399 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((406 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((440 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((455 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((469 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (481 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (507 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((529 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((544 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (561 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 1)) { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((579 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((586 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (593 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (603 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (613 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (622 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((640 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 3)) { + counter9 = (counter9 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((658 << 6) | (counter8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((671 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (687 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (697 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (715 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (726 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (735 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (740 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (750 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (761 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (770 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (780 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (790 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (799 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (804 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (808 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (815 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (819 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 99 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 1, 0, 10576, 2, 0, 10592, 2, 0, 12048, 2, 0, 12064, 2, 0, 13136, 2, 0, 13152, 2, 0, 13824, 2, 0, 14144, 4, 0, 15488, 8, 0, 16656, 8, 0, 16672, 8, 0, 16688, 8, 0, 17488, 8, 0, 17504, 8, 0, 17520, 8, 0, 18176, 8, 0, 22848, 4, 0, 28160, 1, 0, 28164, 1, 0, 28168, 1, 0, 28176, 1, 0, 28180, 1, 0, 28184, 1, 0, 29120, 1, 0, 29124, 1, 0, 29128, 1, 0, 29136, 1, 0, 29140, 1, 0, 29144, 1, 0, 30784, 1, 0, 38592, 9, 0, 38592, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267519159642989_87_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267519159642989_87_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d669d046 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267519159642989_87_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,226 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2388, 1, 0, 2392, 1, 0, 2396, 1, 0, 3092, 1, 0, 3096, 1, 0, 3100, 1, 0, 4864, 1, 0, 5184, 7, 0, 5184, 7, 0, 5184, 7, 0, 5632, 15, 0, 5632, 15, 0, 5632, 15, 0, 5632, 15, 0, 8912, 2, 0, 8928, 2, 0, 8944, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267519673489194_90_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267519673489194_90_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..afa75d9e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267519673489194_90_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,379 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((334 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((339 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3136, 1, 0, 3152, 1, 0, 3168, 1, 0, 5824, 1, 0, 5840, 1, 0, 5856, 1, 0, 6976, 1, 0, 8448, 5, 0, 8448, 5, 0, 9344, 10, 0, 9344, 10, 0, 10640, 10, 0, 10640, 10, 0, 10656, 10, 0, 10656, 10, 0, 16320, 5, 0, 16320, 5, 0, 17488, 2, 0, 17504, 2, 0, 17520, 2, 0, 18320, 8, 0, 18336, 8, 0, 18352, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267519840662710_91_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267519840662710_91_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2098e850 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267519840662710_91_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,293 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (counter2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (counter2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (counter2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (counter2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((253 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((258 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4, 0, 1104, 4, 0, 1120, 4, 0, 2304, 8, 0, 2320, 8, 0, 2336, 8, 0, 5520, 13, 0, 5520, 13, 0, 5520, 13, 0, 5536, 13, 0, 5536, 13, 0, 5536, 13, 0, 9940, 6, 0, 9940, 6, 0, 9944, 6, 0, 9944, 6, 0, 9956, 6, 0, 9956, 6, 0, 9960, 6, 0, 9960, 6, 0, 11732, 8, 0, 11736, 8, 0, 11748, 8, 0, 11752, 8, 0, 12436, 6, 0, 12436, 6, 0, 12440, 6, 0, 12440, 6, 0, 12452, 6, 0, 12452, 6, 0, 12456, 6, 0, 12456, 6, 0, 13136, 13, 0, 13136, 13, 0, 13136, 13, 0, 13152, 13, 0, 13152, 13, 0, 13152, 13, 0, 14996, 11, 0, 14996, 11, 0, 14996, 11, 0, 15000, 11, 0, 15000, 11, 0, 15000, 11, 0, 15004, 11, 0, 15004, 11, 0, 15004, 11, 0, 15012, 11, 0, 15012, 11, 0, 15012, 11, 0, 15016, 11, 0, 15016, 11, 0, 15016, 11, 0, 15020, 11, 0, 15020, 11, 0, 15020, 11, 0, 15028, 11, 0, 15028, 11, 0, 15028, 11, 0, 15032, 11, 0, 15032, 11, 0, 15032, 11, 0, 15036, 11, 0, 15036, 11, 0, 15036, 11, 0, 15636, 9, 0, 15636, 9, 0, 15640, 9, 0, 15640, 9, 0, 15644, 9, 0, 15644, 9, 0, 15652, 9, 0, 15652, 9, 0, 15656, 9, 0, 15656, 9, 0, 15660, 9, 0, 15660, 9, 0, 15668, 9, 0, 15668, 9, 0, 15672, 9, 0, 15672, 9, 0, 15676, 9, 0, 15676, 9, 0, 16532, 4, 0, 16536, 4, 0, 16540, 4, 0, 16548, 4, 0, 16552, 4, 0, 16556, 4, 0, 16564, 4, 0, 16568, 4, 0, 16572, 4, 0, 16976, 3, 0, 16976, 3, 0, 16992, 3, 0, 16992, 3, 0, 17008, 3, 0, 17008, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267520416813278_92_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267520416813278_92_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dbac8885 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267520416813278_92_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,152 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1488, 8, 0, 1504, 8, 0, 1520, 8, 0, 4368, 8, 0, 4384, 8, 0, 4400, 8, 0, 5524, 8, 0, 5528, 8, 0, 5532, 8, 0, 5540, 8, 0, 5544, 8, 0, 5548, 8, 0, 5556, 8, 0, 5560, 8, 0, 5564, 8, 0, 6356, 8, 0, 6360, 8, 0, 6364, 8, 0, 6372, 8, 0, 6376, 8, 0, 6380, 8, 0, 6388, 8, 0, 6392, 8, 0, 6396, 8, 0, 6800, 8, 0, 6816, 8, 0, 6832, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267520568559446_93_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267520568559446_93_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..761c0f7f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267520568559446_93_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2944, 1, 0, 3584, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267520625891479_94_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267520625891479_94_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..16fb4aea --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267520625891479_94_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,150 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267520676521637_95_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267520676521637_95_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17d0f9e4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267520676521637_95_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,263 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1792, 8, 0, 1808, 8, 0, 4480, 8, 0, 4496, 8, 0, 6080, 8, 0, 6096, 8, 0, 6976, 5, 0, 6976, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267520761146334_96_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267520761146334_96_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267520761146334_96_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267520802534816_97_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267520802534816_97_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..afe73e13 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267520802534816_97_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,415 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (i2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (i2 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((246 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((374 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((385 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 1, 0, 1056, 1, 0, 1072, 1, 0, 2000, 1, 0, 2004, 1, 0, 2016, 1, 0, 2020, 1, 0, 2032, 1, 0, 2036, 1, 0, 2448, 1, 0, 2464, 1, 0, 2480, 1, 0, 4288, 1, 0, 4304, 1, 0, 5700, 1, 0, 5704, 1, 0, 5708, 1, 0, 5716, 1, 0, 5720, 1, 0, 5724, 1, 0, 7360, 1, 0, 7376, 1, 0, 11264, 1, 0, 11280, 1, 0, 12160, 4, 0, 15184, 8, 0, 15188, 8, 0, 15192, 8, 0, 15200, 8, 0, 15204, 8, 0, 15208, 8, 0, 16336, 8, 0, 16352, 8, 0, 17408, 9, 0, 17408, 9, 0, 20416, 1, 0, 22912, 4, 0, 23952, 4, 0, 23968, 4, 0, 23984, 4, 0, 24656, 4, 0, 24672, 4, 0, 24688, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267521209009847_100_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267521209009847_100_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3617bf00 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267521209009847_100_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,390 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((350 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 117 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 1, 0, 2320, 1, 0, 2336, 1, 0, 2944, 1, 0, 2960, 1, 0, 2976, 1, 0, 4992, 9, 0, 4992, 9, 0, 5008, 9, 0, 5008, 9, 0, 5024, 9, 0, 5024, 9, 0, 7104, 1, 0, 7120, 1, 0, 7136, 1, 0, 7872, 4, 0, 9024, 6, 0, 9024, 6, 0, 11604, 4, 0, 11608, 4, 0, 11620, 4, 0, 11624, 4, 0, 11636, 4, 0, 11640, 4, 0, 12180, 4, 0, 12184, 4, 0, 12196, 4, 0, 12200, 4, 0, 12212, 4, 0, 12216, 4, 0, 12624, 4, 0, 12640, 4, 0, 12656, 4, 0, 14608, 1, 0, 14624, 1, 0, 14640, 1, 0, 15296, 9, 0, 15296, 9, 0, 21312, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267521340685755_102_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267521340685755_102_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e339ebc3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267521340685755_102_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 1920, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267521392076230_103_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267521392076230_103_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a3f7fdba --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267521392076230_103_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 4, 0, 2816, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267521437923634_104_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267521437923634_104_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1d59439e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267521437923634_104_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 4, 0, 864, 4, 0, 880, 4, 0, 2064, 9, 0, 2064, 9, 0, 2068, 9, 0, 2068, 9, 0, 2072, 9, 0, 2072, 9, 0, 2080, 9, 0, 2080, 9, 0, 2084, 9, 0, 2084, 9, 0, 2088, 9, 0, 2088, 9, 0, 2096, 9, 0, 2096, 9, 0, 2100, 9, 0, 2100, 9, 0, 2104, 9, 0, 2104, 9, 0, 2512, 2, 0, 2528, 2, 0, 2544, 2, 0, 3328, 1, 0, 6912, 4, 0, 6916, 4, 0, 6920, 4, 0, 6928, 4, 0, 6932, 4, 0, 6936, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267521575095026_105_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267521575095026_105_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c28ea88d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267521575095026_105_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 1, 0, 2816, 8, 0, 2432, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267521621732129_106_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267521621732129_106_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4464354e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267521621732129_106_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,704 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (353 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (383 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (390 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (415 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (424 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((449 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((460 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((471 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (489 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((511 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (518 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (528 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (535 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (548 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (558 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (565 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((587 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((602 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (609 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((627 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((644 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((658 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (663 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((681 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((691 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((702 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 4800, 2, 0, 8896, 2, 0, 9216, 4, 0, 10368, 8, 0, 19072, 8, 0, 24960, 8, 0, 26560, 1, 0, 31296, 4, 0, 32720, 4, 0, 32736, 4, 0, 36160, 1, 0, 37584, 1, 0, 37600, 1, 0, 38544, 1, 0, 38560, 1, 0, 42432, 4, 0, 43600, 8, 0, 43616, 8, 0, 43632, 8, 0, 44240, 8, 0, 44256, 8, 0, 44272, 8, 0, 44944, 8, 0, 44960, 8, 0, 44976, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267521900026369_107_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267521900026369_107_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e8d90de9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267521900026369_107_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 1920, 8, 0, 2560, 5, 0, 2560, 5, 0, 4032, 8, 0, 7936, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267521969620524_108_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267521969620524_108_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fe239e2b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267521969620524_108_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 13, 0, 1360, 13, 0, 1360, 13, 0, 1376, 13, 0, 1376, 13, 0, 1376, 13, 0, 4880, 5, 0, 4880, 5, 0, 4896, 5, 0, 4896, 5, 0, 5584, 8, 0, 5600, 8, 0, 7120, 2, 0, 7136, 2, 0, 8272, 8, 0, 8288, 8, 0, 9680, 10, 0, 9680, 10, 0, 9696, 10, 0, 9696, 10, 0, 10640, 13, 0, 10640, 13, 0, 10640, 13, 0, 10656, 13, 0, 10656, 13, 0, 10656, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267522068309956_109_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267522068309956_109_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b93a8d6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267522068309956_109_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2644, 2, 0, 2648, 2, 0, 2652, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267522122442880_110_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267522122442880_110_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4ec48689 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267522122442880_110_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 5760, 4, 0, 6208, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267522168655263_111_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267522168655263_111_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..21083692 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267522168655263_111_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,374 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2048, 9, 0, 2048, 9, 0, 6976, 2, 0, 8576, 4, 0, 9280, 4, 0, 13888, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267522227395730_112_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267522227395730_112_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f8f082fa --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267522227395730_112_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4352, 2, 0, 5312, 13, 0, 5312, 13, 0, 5312, 13, 0, 5952, 5, 0, 5952, 5, 0, 7488, 5, 0, 7488, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267522277361214_113_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267522277361214_113_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fbbdeb3e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267522277361214_113_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,328 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((301 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 10, 0, 2496, 10, 0, 2240, 1, 0, 1856, 4, 0, 4160, 1, 0, 8960, 5, 0, 8960, 5, 0, 11344, 5, 0, 11344, 5, 0, 11348, 5, 0, 11348, 5, 0, 11360, 5, 0, 11360, 5, 0, 11364, 5, 0, 11364, 5, 0, 11376, 5, 0, 11376, 5, 0, 11380, 5, 0, 11380, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267522499144657_115_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267522499144657_115_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fbd61102 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267522499144657_115_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,416 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((308 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((320 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (359 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 5824, 1, 0, 7040, 1, 0, 7056, 1, 0, 10112, 2, 0, 10128, 2, 0, 12096, 4, 0, 16512, 6, 0, 16512, 6, 0, 20496, 1, 0, 20512, 1, 0, 21184, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267522598806537_116_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267522598806537_116_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..553a06da --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267522598806537_116_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,243 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 1, 0, 1744, 1, 0, 9856, 2, 0, 10880, 4, 0, 13392, 8, 0, 13396, 8, 0, 13400, 8, 0, 13408, 8, 0, 13412, 8, 0, 13416, 8, 0, 14352, 8, 0, 14356, 8, 0, 14360, 8, 0, 14368, 8, 0, 14372, 8, 0, 14376, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267522672848072_117_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267522672848072_117_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..642fe930 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267522672848072_117_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,303 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((267 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4416, 8, 0, 4420, 8, 0, 4432, 8, 0, 4436, 8, 0, 4448, 8, 0, 4452, 8, 0, 6144, 8, 0, 6160, 8, 0, 6176, 8, 0, 6848, 5, 0, 6848, 5, 0, 7488, 1, 0, 8128, 1, 0, 10304, 9, 0, 10304, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267523053880555_118_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267523053880555_118_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3ab84fd3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267523053880555_118_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 1, 0, 912, 1, 0, 2052, 5, 0, 2052, 5, 0, 2056, 5, 0, 2056, 5, 0, 2060, 5, 0, 2060, 5, 0, 2068, 5, 0, 2068, 5, 0, 2072, 5, 0, 2072, 5, 0, 2076, 5, 0, 2076, 5, 0, 3716, 1, 0, 3720, 1, 0, 3724, 1, 0, 3732, 1, 0, 3736, 1, 0, 3740, 1, 0, 4740, 5, 0, 4740, 5, 0, 4744, 5, 0, 4744, 5, 0, 4748, 5, 0, 4748, 5, 0, 4756, 5, 0, 4756, 5, 0, 4760, 5, 0, 4760, 5, 0, 4764, 5, 0, 4764, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267523235293339_119_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267523235293339_119_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef9f5a3c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267523235293339_119_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,94 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1216, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267523356378585_121_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267523356378585_121_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..257e496e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267523356378585_121_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,376 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((188 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((202 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((counter2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1, 0, 1360, 1, 0, 3008, 1, 0, 3024, 1, 0, 3712, 8, 0, 3728, 8, 0, 4416, 8, 0, 4432, 8, 0, 4992, 8, 0, 5008, 8, 0, 7680, 4, 0, 7696, 4, 0, 8128, 4, 0, 8144, 4, 0, 8768, 1, 0, 11408, 1, 0, 11412, 1, 0, 11416, 1, 0, 12048, 1, 0, 12052, 1, 0, 12056, 1, 0, 16000, 1, 0, 20864, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267523584692048_123_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267523584692048_123_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..de75b9e4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267523584692048_123_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 6, 0, 1472, 6, 0, 4096, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267523632304459_124_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267523632304459_124_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ea7a0249 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267523632304459_124_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 5, 0, 1344, 5, 0, 1088, 8, 0, 1984, 9, 0, 1984, 9, 0, 2880, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267523679382536_125_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267523679382536_125_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8da0aadb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267523679382536_125_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,332 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2240, 9, 0, 2240, 9, 0, 6144, 2, 0, 6160, 2, 0, 10128, 4, 0, 10144, 4, 0, 15056, 8, 0, 16016, 12, 0, 16016, 12, 0, 16848, 1, 0, 17488, 1, 0, 18768, 1, 0, 19472, 9, 0, 19472, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267523767699840_126_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267523767699840_126_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29138af0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267523767699840_126_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 2624, 4, 0, 3792, 1, 0, 3808, 1, 0, 3824, 1, 0, 5264, 9, 0, 5264, 9, 0, 5268, 9, 0, 5268, 9, 0, 5280, 9, 0, 5280, 9, 0, 5284, 9, 0, 5284, 9, 0, 5296, 9, 0, 5296, 9, 0, 5300, 9, 0, 5300, 9, 0, 6336, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267523846008726_127_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267523846008726_127_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..08cbdc64 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267523846008726_127_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,247 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1664, 1, 0, 1680, 1, 0, 2816, 1, 0, 2832, 1, 0, 3456, 1, 0, 3472, 1, 0, 6660, 2, 0, 6664, 2, 0, 6668, 2, 0, 6676, 2, 0, 6680, 2, 0, 6684, 2, 0, 7620, 2, 0, 7624, 2, 0, 7628, 2, 0, 7636, 2, 0, 7640, 2, 0, 7644, 2, 0, 8640, 2, 0, 8656, 2, 0, 8960, 4, 0, 11792, 8, 0, 11808, 8, 0, 11824, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267524182862047_129_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267524182862047_129_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..72b3f3b1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267524182862047_129_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,337 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((267 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((281 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 1, 0, 1940, 1, 0, 1944, 1, 0, 1952, 1, 0, 1956, 1, 0, 1960, 1, 0, 2640, 1, 0, 2644, 1, 0, 2648, 1, 0, 2656, 1, 0, 2660, 1, 0, 2664, 1, 0, 6336, 2, 0, 6656, 4, 0, 7808, 8, 0, 9792, 8, 0, 10496, 8, 0, 12864, 1, 0, 14080, 4, 0, 14528, 4, 0, 17088, 11, 0, 17088, 11, 0, 17088, 11, 0, 17092, 11, 0, 17092, 11, 0, 17092, 11, 0, 17096, 11, 0, 17096, 11, 0, 17096, 11, 0, 17104, 11, 0, 17104, 11, 0, 17104, 11, 0, 17108, 11, 0, 17108, 11, 0, 17108, 11, 0, 17112, 11, 0, 17112, 11, 0, 17112, 11, 0, 17984, 10, 0, 17984, 10, 0, 17988, 10, 0, 17988, 10, 0, 17992, 10, 0, 17992, 10, 0, 18000, 10, 0, 18000, 10, 0, 18004, 10, 0, 18004, 10, 0, 18008, 10, 0, 18008, 10, 0, 19136, 5, 0, 19136, 5, 0, 19152, 5, 0, 19152, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267524740535209_131_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267524740535209_131_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dcd259ea --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267524740535209_131_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 5, 0, 1152, 5, 0, 1168, 5, 0, 1168, 5, 0, 1184, 5, 0, 1184, 5, 0, 2752, 10, 0, 2752, 10, 0, 2768, 10, 0, 2768, 10, 0, 2784, 10, 0, 2784, 10, 0, 3392, 5, 0, 3392, 5, 0, 4608, 1, 0, 6400, 4, 0, 6416, 4, 0, 7488, 4, 0, 7492, 4, 0, 7496, 4, 0, 7504, 4, 0, 7508, 4, 0, 7512, 4, 0, 9280, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267524825752402_132_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267524825752402_132_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..837965d8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267524825752402_132_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 1, 0, 1616, 1, 0, 2240, 1, 0, 2256, 1, 0, 2816, 1, 0, 2832, 1, 0, 3136, 1, 0, 3152, 1, 0, 3584, 8, 0, 3600, 8, 0, 5248, 9, 0, 5248, 9, 0, 5264, 9, 0, 5264, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267524950672256_134_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267524950672256_134_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..104bd8c4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267524950672256_134_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 4880, 2, 0, 4896, 2, 0, 5840, 2, 0, 5856, 2, 0, 6784, 4, 0, 7232, 8, 0, 9236, 1, 0, 9240, 1, 0, 9252, 1, 0, 9256, 1, 0, 9684, 1, 0, 9688, 1, 0, 9700, 1, 0, 9704, 1, 0, 11216, 14, 0, 11216, 14, 0, 11216, 14, 0, 11232, 14, 0, 11232, 14, 0, 11232, 14, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267525045822984_135_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267525045822984_135_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4f96d2ae --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267525045822984_135_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 5, 0, 1296, 5, 0, 1312, 5, 0, 1312, 5, 0, 2452, 5, 0, 2452, 5, 0, 2456, 5, 0, 2456, 5, 0, 2460, 5, 0, 2460, 5, 0, 2468, 5, 0, 2468, 5, 0, 2472, 5, 0, 2472, 5, 0, 2476, 5, 0, 2476, 5, 0, 3156, 1, 0, 3160, 1, 0, 3164, 1, 0, 3172, 1, 0, 3176, 1, 0, 3180, 1, 0, 4096, 8, 0, 4112, 8, 0, 4128, 8, 0, 6656, 2, 0, 6672, 2, 0, 6688, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267525166026753_136_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267525166026753_136_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5648c566 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267525166026753_136_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0, 1104, 1, 0, 1120, 1, 0, 2500, 9, 0, 2500, 9, 0, 2504, 9, 0, 2504, 9, 0, 2508, 9, 0, 2508, 9, 0, 2516, 9, 0, 2516, 9, 0, 2520, 9, 0, 2520, 9, 0, 2524, 9, 0, 2524, 9, 0, 2532, 9, 0, 2532, 9, 0, 2536, 9, 0, 2536, 9, 0, 2540, 9, 0, 2540, 9, 0, 8064, 4, 0, 9472, 1, 0, 9488, 1, 0, 9504, 1, 0, 10944, 1, 0, 10960, 1, 0, 10976, 1, 0, 11520, 1, 0, 11536, 1, 0, 11552, 1, 0, 12160, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267525283577111_137_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267525283577111_137_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6510d379 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267525283577111_137_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,356 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((266 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((343 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((353 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2768, 2, 0, 2784, 2, 0, 2800, 2, 0, 6464, 4, 0, 9152, 4, 0, 13312, 8, 0, 13952, 1, 0, 15376, 2, 0, 19472, 2, 0, 21952, 4, 0, 21956, 4, 0, 21960, 4, 0, 21968, 4, 0, 21972, 4, 0, 21976, 4, 0, 21984, 4, 0, 21988, 4, 0, 21992, 4, 0, 23040, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267525405507920_138_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267525405507920_138_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d02e8cb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267525405507920_138_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 8, 0, 3588, 8, 0, 3600, 8, 0, 3604, 8, 0, 4160, 8, 0, 4164, 8, 0, 4176, 8, 0, 4180, 8, 0, 5568, 7, 0, 5568, 7, 0, 5568, 7, 0, 5584, 7, 0, 5584, 7, 0, 5584, 7, 0, 6480, 1, 0, 6496, 1, 0, 7440, 8, 0, 7444, 8, 0, 7456, 8, 0, 7460, 8, 0, 8528, 1, 0, 8532, 1, 0, 8544, 1, 0, 8548, 1, 0, 8976, 4, 0, 8980, 4, 0, 8992, 4, 0, 8996, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267525837580642_140_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267525837580642_140_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab21f55f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267525837580642_140_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 6, 0, 1472, 6, 0, 1920, 8, 0, 3072, 5, 0, 3072, 5, 0, 4624, 5, 0, 4624, 5, 0, 4640, 5, 0, 4640, 5, 0, 5264, 1, 0, 5280, 1, 0, 6672, 5, 0, 6672, 5, 0, 6688, 5, 0, 6688, 5, 0, 8192, 10, 0, 8192, 10, 0, 9408, 6, 0, 9408, 6, 0, 9424, 6, 0, 9424, 6, 0, 9440, 6, 0, 9440, 6, 0, 10560, 9, 0, 10560, 9, 0, 10576, 9, 0, 10576, 9, 0, 10592, 9, 0, 10592, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267525937099077_141_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267525937099077_141_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1351b39b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267525937099077_141_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 117 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 9, 0, 1152, 9, 0, 1168, 9, 0, 1168, 9, 0, 1184, 9, 0, 1184, 9, 0, 2304, 9, 0, 2304, 9, 0, 2320, 9, 0, 2320, 9, 0, 2336, 9, 0, 2336, 9, 0, 3456, 9, 0, 3456, 9, 0, 3472, 9, 0, 3472, 9, 0, 3488, 9, 0, 3488, 9, 0, 4160, 9, 0, 4160, 9, 0, 4176, 9, 0, 4176, 9, 0, 4192, 9, 0, 4192, 9, 0, 6272, 2, 0, 6276, 2, 0, 6288, 2, 0, 6292, 2, 0, 6304, 2, 0, 6308, 2, 0, 7936, 2, 0, 7952, 2, 0, 7968, 2, 0, 8640, 9, 0, 8640, 9, 0, 8656, 9, 0, 8656, 9, 0, 8672, 9, 0, 8672, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267526119220716_142_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267526119220716_142_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..997425de --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267526119220716_142_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,334 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 1, 0, 2308, 1, 0, 2320, 1, 0, 2324, 1, 0, 2336, 1, 0, 2340, 1, 0, 3456, 1, 0, 3460, 1, 0, 3472, 1, 0, 3476, 1, 0, 3488, 1, 0, 3492, 1, 0, 6912, 4, 0, 7360, 12, 0, 7360, 12, 0, 9728, 5, 0, 9728, 5, 0, 9344, 8, 0, 9088, 2, 0, 10368, 1, 0, 19008, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267526275366223_143_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267526275366223_143_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e61a3245 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267526275366223_143_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,317 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((189 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((227 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((236 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((252 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 7296, 1, 0, 7312, 1, 0, 16848, 2, 0, 16864, 2, 0, 16880, 2, 0, 17152, 4, 0, 17600, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267526476884735_145_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267526476884735_145_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..73a3cfdd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267526476884735_145_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,210 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((168 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2816, 13, 0, 2816, 13, 0, 2816, 13, 0, 2432, 2, 0, 3456, 5, 0, 3456, 5, 0, 6144, 2, 0, 6160, 2, 0, 8064, 6, 0, 8064, 6, 0, 11216, 4, 0, 11232, 4, 0, 12160, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267526550241551_146_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267526550241551_146_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1eae7f9c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267526550241551_146_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 3268, 8, 0, 3272, 8, 0, 3276, 8, 0, 3284, 8, 0, 3288, 8, 0, 3292, 8, 0, 3972, 8, 0, 3976, 8, 0, 3980, 8, 0, 3988, 8, 0, 3992, 8, 0, 3996, 8, 0, 4672, 8, 0, 4688, 8, 0, 5760, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267526627674395_147_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267526627674395_147_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5d518e4a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267526627674395_147_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,246 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1, 0, 1360, 1, 0, 2496, 1, 0, 2512, 1, 0, 3200, 1, 0, 3216, 1, 0, 3904, 1, 0, 3920, 1, 0, 9280, 6, 0, 9280, 6, 0, 9728, 14, 0, 9728, 14, 0, 9728, 14, 0, 12416, 1, 0, 12160, 2, 0, 11776, 4, 0, 11520, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267526714656549_148_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267526714656549_148_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f0053aac --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267526714656549_148_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1152, 1, 0, 1472, 4, 0, 1920, 8, 0, 2560, 9, 0, 2560, 9, 0, 3456, 6, 0, 3456, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267526764866725_149_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267526764866725_149_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..612ed4fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267526764866725_149_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,315 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((256 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((266 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((273 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 1232, 1, 0, 2116, 1, 0, 2120, 1, 0, 2124, 1, 0, 2132, 1, 0, 2136, 1, 0, 2140, 1, 0, 5376, 4, 0, 15184, 2, 0, 15200, 2, 0, 15216, 2, 0, 18368, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267527002444118_151_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267527002444118_151_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d3229c70 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267527002444118_151_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,448 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((340 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((350 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((359 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((363 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((370 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((381 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2320, 4, 0, 2336, 4, 0, 7488, 8, 0, 8384, 1, 0, 9536, 1, 0, 11604, 1, 0, 11620, 1, 0, 13056, 1, 0, 14592, 6, 0, 14592, 6, 0, 15232, 4, 0, 16704, 9, 0, 16704, 9, 0, 19328, 2, 0, 19344, 2, 0, 19360, 2, 0, 22404, 4, 0, 22408, 4, 0, 22412, 4, 0, 22420, 4, 0, 22424, 4, 0, 22428, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267527266384262_153_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267527266384262_153_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f9bf336b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267527266384262_153_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,94 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1792, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267527316821528_154_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267527316821528_154_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4e80429a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267527316821528_154_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,224 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1152, 1, 0, 6976, 8, 0, 7616, 5, 0, 7616, 5, 0, 8832, 1, 0, 9728, 4, 0, 10176, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267527377378047_155_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267527377378047_155_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..591a1889 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267527377378047_155_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,290 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((233 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((242 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((249 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 8, 0, 3328, 12, 0, 3328, 12, 0, 5696, 4, 0, 7488, 4, 0, 8064, 5, 0, 8064, 5, 0, 8960, 1, 0, 9536, 1, 0, 11204, 2, 0, 11208, 2, 0, 11220, 2, 0, 11224, 2, 0, 11904, 5, 0, 11904, 5, 0, 11920, 5, 0, 11920, 5, 0, 13328, 1, 0, 13332, 1, 0, 13336, 1, 0, 13344, 1, 0, 13348, 1, 0, 13352, 1, 0, 13360, 1, 0, 13364, 1, 0, 13368, 1, 0, 14480, 9, 0, 14480, 9, 0, 14484, 9, 0, 14484, 9, 0, 14488, 9, 0, 14488, 9, 0, 14496, 9, 0, 14496, 9, 0, 14500, 9, 0, 14500, 9, 0, 14504, 9, 0, 14504, 9, 0, 14512, 9, 0, 14512, 9, 0, 14516, 9, 0, 14516, 9, 0, 14520, 9, 0, 14520, 9, 0, 14928, 4, 0, 14932, 4, 0, 14936, 4, 0, 14944, 4, 0, 14948, 4, 0, 14952, 4, 0, 14960, 4, 0, 14964, 4, 0, 14968, 4, 0, 15952, 4, 0, 15956, 4, 0, 15960, 4, 0, 15968, 4, 0, 15972, 4, 0, 15976, 4, 0, 15984, 4, 0, 15988, 4, 0, 15992, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267528375138444_156_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267528375138444_156_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a83fcb9b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267528375138444_156_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267528423118235_157_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267528423118235_157_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..646ce0d4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267528423118235_157_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 5760, 1, 0, 9152, 2, 0, 9168, 2, 0, 11648, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267528487433498_158_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267528487433498_158_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ac9f1077 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267528487433498_158_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((149 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 4992, 4, 0, 5440, 8, 0, 6480, 10, 0, 6480, 10, 0, 6496, 10, 0, 6496, 10, 0, 7892, 14, 0, 7892, 14, 0, 7892, 14, 0, 7896, 14, 0, 7896, 14, 0, 7896, 14, 0, 7908, 14, 0, 7908, 14, 0, 7908, 14, 0, 7912, 14, 0, 7912, 14, 0, 7912, 14, 0, 8980, 3, 0, 8980, 3, 0, 8984, 3, 0, 8984, 3, 0, 8996, 3, 0, 8996, 3, 0, 9000, 3, 0, 9000, 3, 0, 9556, 4, 0, 9560, 4, 0, 9572, 4, 0, 9576, 4, 0, 10516, 5, 0, 10516, 5, 0, 10520, 5, 0, 10520, 5, 0, 10532, 5, 0, 10532, 5, 0, 10536, 5, 0, 10536, 5, 0, 11280, 10, 0, 11280, 10, 0, 11296, 10, 0, 11296, 10, 0, 14080, 8, 0, 14528, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267528668224400_159_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267528668224400_159_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..62a95533 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267528668224400_159_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,291 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5248, 1, 0, 6416, 9, 0, 6416, 9, 0, 6432, 9, 0, 6432, 9, 0, 8016, 9, 0, 8016, 9, 0, 8032, 9, 0, 8032, 9, 0, 8704, 9, 0, 8704, 9, 0, 13760, 4, 0, 15056, 9, 0, 15056, 9, 0, 15072, 9, 0, 15072, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267528772161380_160_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267528772161380_160_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fb7d64fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267528772161380_160_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,380 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((295 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((309 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((319 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((326 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((337 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((346 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((366 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i6 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1984, 1, 0, 2000, 1, 0, 2016, 1, 0, 2688, 1, 0, 2704, 1, 0, 2720, 1, 0, 3136, 1, 0, 15488, 8, 0, 16128, 5, 0, 16128, 5, 0, 17664, 3, 0, 17664, 3, 0, 17680, 3, 0, 17680, 3, 0, 18880, 9, 0, 18880, 9, 0, 18884, 9, 0, 18884, 9, 0, 18896, 9, 0, 18896, 9, 0, 18900, 9, 0, 18900, 9, 0, 19776, 5, 0, 19776, 5, 0, 19780, 5, 0, 19780, 5, 0, 19792, 5, 0, 19792, 5, 0, 19796, 5, 0, 19796, 5, 0, 22144, 8, 0, 22148, 8, 0, 22160, 8, 0, 22164, 8, 0, 24064, 1, 0, 24080, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267529060121843_161_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267529060121843_161_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aa07e747 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267529060121843_161_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,71 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 10, 0, 1152, 10, 0, 1168, 10, 0, 1168, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267529114922800_162_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267529114922800_162_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e37f0044 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267529114922800_162_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,102 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 9, 0, 1104, 9, 0, 1120, 9, 0, 1120, 9, 0, 2960, 4, 0, 2964, 4, 0, 2968, 4, 0, 2976, 4, 0, 2980, 4, 0, 2984, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267529193160919_163_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267529193160919_163_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d0327d05 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267529193160919_163_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,357 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 1, 0, 3600, 1, 0, 3616, 1, 0, 4032, 1, 0, 4048, 1, 0, 4064, 1, 0, 7568, 2, 0, 7584, 2, 0, 9152, 4, 0, 9600, 8, 0, 10240, 9, 0, 10240, 9, 0, 13504, 4, 0, 14144, 9, 0, 14144, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267529280131185_164_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267529280131185_164_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7ebd4784 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267529280131185_164_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,259 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (i1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((180 << 6) | (i1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3904, 5, 0, 3904, 5, 0, 3920, 5, 0, 3920, 5, 0, 6336, 1, 0, 6352, 1, 0, 7364, 8, 0, 7368, 8, 0, 7380, 8, 0, 7384, 8, 0, 7812, 8, 0, 7816, 8, 0, 7828, 8, 0, 7832, 8, 0, 8768, 1, 0, 8784, 1, 0, 9472, 4, 0, 9488, 4, 0, 12416, 4, 0, 12432, 4, 0, 13120, 2, 0, 13136, 2, 0, 13824, 9, 0, 13824, 9, 0, 13840, 9, 0, 13840, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267529409065542_165_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267529409065542_165_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..91d3d1b4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267529409065542_165_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 8, 0, 2512, 8, 0, 4800, 8, 0, 4816, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267529872763872_168_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267529872763872_168_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d52b8dbb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267529872763872_168_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 1920, 5, 0, 1920, 5, 0, 3712, 1, 0, 5952, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267529944715395_169_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267529944715395_169_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5019669 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267529944715395_169_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1728, 2, 0, 2688, 2, 0, 2704, 2, 0, 2720, 2, 0, 3776, 2, 0, 4096, 4, 0, 6656, 8, 0, 7552, 8, 0, 9280, 1, 0, 9296, 1, 0, 9984, 1, 0, 10000, 1, 0, 14352, 2, 0, 14356, 2, 0, 14360, 2, 0, 14368, 2, 0, 14372, 2, 0, 14376, 2, 0, 15744, 4, 0, 16192, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267530067710882_170_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267530067710882_170_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4246b648 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267530067710882_170_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,422 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((334 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((346 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((353 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((364 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((382 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((419 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((430 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (437 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (446 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (451 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 10, 0, 1728, 10, 0, 2640, 8, 0, 2656, 8, 0, 5584, 2, 0, 5600, 2, 0, 6544, 2, 0, 6560, 2, 0, 7680, 2, 0, 8640, 1, 0, 8656, 1, 0, 11344, 1, 0, 11360, 1, 0, 13200, 1, 0, 13216, 1, 0, 13952, 1, 0, 14272, 4, 0, 14720, 12, 0, 14720, 12, 0, 17600, 1, 0, 20176, 2, 0, 20192, 2, 0, 28864, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267530233266969_171_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267530233266969_171_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7bec2da7 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267530233266969_171_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,379 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5248, 8, 0, 11412, 1, 0, 11416, 1, 0, 11428, 1, 0, 11432, 1, 0, 12736, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267530451441230_172_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267530451441230_172_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8f8204be --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267530451441230_172_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 6848, 2, 0, 10240, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267530515119253_173_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267530515119253_173_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1bd65cf6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267530515119253_173_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,193 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 1296, 1, 0, 1728, 1, 0, 5696, 9, 0, 5696, 9, 0, 8336, 4, 0, 8352, 4, 0, 8368, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267530852603152_175_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267530852603152_175_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7ac3687a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267530852603152_175_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,256 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((i0 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 1, 0, 912, 1, 0, 928, 1, 0, 2368, 5, 0, 2368, 5, 0, 2372, 5, 0, 2372, 5, 0, 2376, 5, 0, 2376, 5, 0, 2384, 5, 0, 2384, 5, 0, 2388, 5, 0, 2388, 5, 0, 2392, 5, 0, 2392, 5, 0, 2400, 5, 0, 2400, 5, 0, 2404, 5, 0, 2404, 5, 0, 2408, 5, 0, 2408, 5, 0, 3584, 1, 0, 10624, 8, 0, 12800, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267531103673265_177_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267531103673265_177_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8ca34725 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267531103673265_177_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 9, 0, 1600, 9, 0, 2240, 8, 0, 9216, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267531149709271_178_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267531149709271_178_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d6ff1a2b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267531149709271_178_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,251 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 4096, 2, 0, 4112, 2, 0, 4128, 2, 0, 8320, 4, 0, 11712, 8, 0, 11728, 8, 0, 11744, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267531213135693_179_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267531213135693_179_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66394e58 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267531213135693_179_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,341 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((281 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 5440, 4, 0, 5888, 8, 0, 6784, 9, 0, 6784, 9, 0, 10388, 10, 0, 10388, 10, 0, 10392, 10, 0, 10392, 10, 0, 10396, 10, 0, 10396, 10, 0, 10404, 10, 0, 10404, 10, 0, 10408, 10, 0, 10408, 10, 0, 10412, 10, 0, 10412, 10, 0, 11520, 2, 0, 13136, 5, 0, 13136, 5, 0, 13152, 5, 0, 13152, 5, 0, 14224, 4, 0, 14240, 4, 0, 15616, 2, 0, 16832, 2, 0, 16848, 2, 0, 18880, 2, 0, 18896, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267531446180682_180_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267531446180682_180_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dce83967 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267531446180682_180_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,212 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 7168, 4, 0, 8512, 8, 0, 9792, 8, 0, 10496, 8, 0, 10944, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267531647259208_182_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267531647259208_182_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6fe1cebc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267531647259208_182_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,303 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((203 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((219 << 6) | (i3 << 4)) | (counter4 << 2)) | counter5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((228 << 6) | (i3 << 4)) | (counter4 << 2)) | counter5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((235 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((276 << 6) | (i3 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((285 << 6) | (i3 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2816, 8, 0, 8256, 10, 0, 8256, 10, 0, 8272, 10, 0, 8272, 10, 0, 8288, 10, 0, 8288, 10, 0, 9984, 8, 0, 11392, 4, 0, 14021, 1, 0, 14022, 1, 0, 14023, 1, 0, 14025, 1, 0, 14026, 1, 0, 14027, 1, 0, 14029, 1, 0, 14030, 1, 0, 14031, 1, 0, 14037, 1, 0, 14038, 1, 0, 14039, 1, 0, 14041, 1, 0, 14042, 1, 0, 14043, 1, 0, 14045, 1, 0, 14046, 1, 0, 14047, 1, 0, 16768, 8, 0, 16784, 8, 0, 18244, 8, 0, 18248, 8, 0, 18260, 8, 0, 18264, 8, 0, 18688, 2, 0, 18704, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532013154258_183_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532013154258_183_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4e9e1935 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532013154258_183_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2368, 4, 0, 2384, 4, 0, 3780, 4, 0, 3784, 4, 0, 3796, 4, 0, 3800, 4, 0, 4740, 4, 0, 4744, 4, 0, 4756, 4, 0, 4760, 4, 0, 5632, 4, 0, 5648, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532178707023_185_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532178707023_185_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..50e2af6c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532178707023_185_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,371 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2576, 2, 0, 2592, 2, 0, 5056, 1, 0, 8196, 4, 0, 8200, 4, 0, 8204, 4, 0, 8212, 4, 0, 8216, 4, 0, 8220, 4, 0, 8228, 4, 0, 8232, 4, 0, 8236, 4, 0, 10192, 8, 0, 10208, 8, 0, 10224, 8, 0, 13568, 9, 0, 13568, 9, 0, 18816, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532341126432_186_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532341126432_186_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5aba02f9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532341126432_186_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,130 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4, 0, 6272, 13, 0, 6272, 13, 0, 6272, 13, 0, 5760, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532392232926_187_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532392232926_187_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..19005a4c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532392232926_187_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,151 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 2752, 8, 0, 6080, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532536017520_189_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532536017520_189_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f9f0a075 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532536017520_189_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,230 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (i1 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 1, 0, 1984, 4, 0, 1344, 8, 0, 3200, 5, 0, 3200, 5, 0, 5952, 4, 0, 5968, 4, 0, 5984, 4, 0, 6400, 1, 0, 6416, 1, 0, 6432, 1, 0, 7424, 5, 0, 7424, 5, 0, 7440, 5, 0, 7440, 5, 0, 7456, 5, 0, 7456, 5, 0, 12864, 2, 0, 12868, 2, 0, 12872, 2, 0, 12880, 2, 0, 12884, 2, 0, 12888, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532751127018_190_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532751127018_190_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e07f699d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532751127018_190_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 4224, 8, 0, 3840, 1, 0, 3456, 6, 0, 3456, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532798130883_191_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532798130883_191_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7631a8ec --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532798130883_191_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,203 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1616, 5, 0, 1616, 5, 0, 2256, 1, 0, 3152, 4, 0, 6720, 9, 0, 6720, 9, 0, 6736, 9, 0, 6736, 9, 0, 8512, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532863604013_192_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532863604013_192_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..97211bc6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532863604013_192_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,199 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 9, 0, 768, 9, 0, 6784, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532910934289_193_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532910934289_193_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b886de25 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532910934289_193_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 7744, 1, 0, 10048, 4, 0, 10624, 5, 0, 10624, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267532964620943_194_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267532964620943_194_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d526dde9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267532964620943_194_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,202 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5312, 5, 0, 5312, 5, 0, 7040, 1, 0, 9600, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533035843679_195_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533035843679_195_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c758df23 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533035843679_195_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,181 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 1, 0, 2304, 4, 0, 3904, 1, 0, 3920, 1, 0, 3936, 1, 0, 5248, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533093841759_196_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533093841759_196_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4498249e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533093841759_196_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,311 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 4608, 4, 0, 7808, 4, 0, 7824, 4, 0, 8960, 1, 0, 8976, 1, 0, 12672, 2, 0, 12688, 2, 0, 12704, 2, 0, 15616, 8, 0, 15632, 8, 0, 15648, 8, 0, 16064, 8, 0, 16080, 8, 0, 16096, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533286067488_198_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533286067488_198_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..18045d95 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533286067488_198_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 3968, 1, 0, 4416, 4, 0, 6928, 4, 0, 6944, 4, 0, 7808, 4, 0, 8384, 5, 0, 8384, 5, 0, 9728, 1, 0, 11024, 1, 0, 11040, 1, 0, 11712, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533360019785_199_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533360019785_199_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..03fb5d8f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533360019785_199_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 13, 0, 4096, 13, 0, 4096, 13, 0, 2688, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533405668245_200_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533405668245_200_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef0f270c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533405668245_200_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 7, 0, 1856, 7, 0, 1856, 7, 0, 3728, 1, 0, 3732, 1, 0, 3736, 1, 0, 3744, 1, 0, 3748, 1, 0, 3752, 1, 0, 3760, 1, 0, 3764, 1, 0, 3768, 1, 0, 4304, 1, 0, 4320, 1, 0, 4336, 1, 0, 4880, 4, 0, 4896, 4, 0, 4912, 4, 0, 5520, 8, 0, 5536, 8, 0, 5552, 8, 0, 6416, 4, 0, 6432, 4, 0, 6448, 4, 0, 7120, 10, 0, 7120, 10, 0, 7136, 10, 0, 7136, 10, 0, 7152, 10, 0, 7152, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533514881379_201_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533514881379_201_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48d50ebe --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533514881379_201_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,354 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((245 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((260 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 1, 0, 4112, 1, 0, 5636, 1, 0, 5652, 1, 0, 8640, 1, 0, 9344, 1, 0, 12928, 4, 0, 17984, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533665325772_203_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533665325772_203_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ad415e5d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533665325772_203_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 12, 0, 1792, 12, 0, 1536, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533709289310_204_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533709289310_204_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..52347713 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533709289310_204_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,474 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((308 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((350 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((365 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((383 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((394 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (437 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 5952, 4, 0, 10624, 5, 0, 10624, 5, 0, 12480, 1, 0, 16640, 4, 0, 18688, 8, 0, 19728, 8, 0, 19744, 8, 0, 20416, 8, 0, 23360, 2, 0, 23376, 2, 0, 23392, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533808719614_205_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533808719614_205_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4144c140 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533808719614_205_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 4032, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267533986112658_207_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267533986112658_207_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28eee381 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267533986112658_207_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 1, 0, 1376, 1, 0, 1392, 1, 0, 5056, 10, 0, 5056, 10, 0, 5072, 10, 0, 5072, 10, 0, 5088, 10, 0, 5088, 10, 0, 5696, 12, 0, 5696, 12, 0, 5712, 12, 0, 5712, 12, 0, 5728, 12, 0, 5728, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267534055977159_208_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267534055977159_208_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cd4a0a51 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267534055977159_208_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5120, 2, 0, 5136, 2, 0, 5152, 2, 0, 5568, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267534120850388_209_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267534120850388_209_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cb9c29d8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267534120850388_209_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,669 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (counter3 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((323 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (383 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((398 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((416 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((427 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((436 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((443 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (457 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((471 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i9 = 0; (i9 < 2); i9 = (i9 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((490 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((503 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((510 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (517 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i10 = 0; (i10 < 3); i10 = (i10 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((545 << 6) | (i10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i11 = 0; (i11 < 2); i11 = (i11 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((568 << 6) | (i10 << 4)) | (i11 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((583 << 6) | (i10 << 4)) | (i11 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((598 << 6) | (i10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i10 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (621 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (631 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (640 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (644 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (655 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (659 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4672, 1, 0, 4688, 1, 0, 5376, 1, 0, 5392, 1, 0, 7444, 9, 0, 7444, 9, 0, 7448, 9, 0, 7448, 9, 0, 7452, 9, 0, 7452, 9, 0, 8596, 9, 0, 8596, 9, 0, 8600, 9, 0, 8600, 9, 0, 8604, 9, 0, 8604, 9, 0, 9300, 9, 0, 9300, 9, 0, 9304, 9, 0, 9304, 9, 0, 9308, 9, 0, 9308, 9, 0, 10000, 9, 0, 10000, 9, 0, 18256, 13, 0, 18256, 13, 0, 18256, 13, 0, 18272, 13, 0, 18272, 13, 0, 18272, 13, 0, 18288, 13, 0, 18288, 13, 0, 18288, 13, 0, 18896, 2, 0, 18912, 2, 0, 18928, 2, 0, 22672, 9, 0, 22672, 9, 0, 22688, 9, 0, 22688, 9, 0, 22704, 9, 0, 22704, 9, 0, 23296, 1, 0, 29248, 4, 0, 32208, 4, 0, 32212, 4, 0, 32224, 4, 0, 32228, 4, 0, 39744, 8, 0, 41920, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267535091465601_210_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267535091465601_210_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7e105d7b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267535091465601_210_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,155 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 5, 0, 1024, 5, 0, 2816, 1, 0, 4608, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267535152389128_211_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267535152389128_211_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17c044b7 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267535152389128_211_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,221 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 8, 0, 1232, 8, 0, 2308, 1, 0, 2312, 1, 0, 2316, 1, 0, 2324, 1, 0, 2328, 1, 0, 2332, 1, 0, 2884, 1, 0, 2888, 1, 0, 2892, 1, 0, 2900, 1, 0, 2904, 1, 0, 2908, 1, 0, 3204, 1, 0, 3208, 1, 0, 3212, 1, 0, 3220, 1, 0, 3224, 1, 0, 3228, 1, 0, 3652, 9, 0, 3652, 9, 0, 3656, 9, 0, 3656, 9, 0, 3660, 9, 0, 3660, 9, 0, 3668, 9, 0, 3668, 9, 0, 3672, 9, 0, 3672, 9, 0, 3676, 9, 0, 3676, 9, 0, 4224, 8, 0, 4240, 8, 0, 9088, 2, 0, 9104, 2, 0, 9120, 2, 0, 9792, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267535487841861_213_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267535487841861_213_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..abc8e141 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267535487841861_213_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267535578865238_215_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267535578865238_215_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ee976ed6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267535578865238_215_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,508 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((223 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((291 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((301 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((308 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((317 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((334 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((360 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((378 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((388 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((397 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((408 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((418 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((427 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((432 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((439 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((443 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((450 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (457 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (461 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4, 0, 2256, 5, 0, 2256, 5, 0, 2272, 5, 0, 2272, 5, 0, 3156, 4, 0, 3172, 4, 0, 4032, 1, 0, 5696, 9, 0, 5696, 9, 0, 8384, 4, 0, 11728, 1, 0, 11744, 1, 0, 11760, 1, 0, 17472, 2, 0, 17488, 2, 0, 17504, 2, 0, 19268, 2, 0, 19272, 2, 0, 19284, 2, 0, 19288, 2, 0, 19300, 2, 0, 19304, 2, 0, 20292, 2, 0, 20296, 2, 0, 20308, 2, 0, 20312, 2, 0, 20324, 2, 0, 20328, 2, 0, 21376, 2, 0, 21392, 2, 0, 21408, 2, 0, 23040, 4, 0, 23056, 4, 0, 23072, 4, 0, 24832, 4, 0, 24848, 4, 0, 24864, 4, 0, 26112, 4, 0, 26128, 4, 0, 26144, 4, 0, 29248, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267535797988774_216_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267535797988774_216_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..117b9d52 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267535797988774_216_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((27 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1732, 1, 0, 1748, 1, 0, 1764, 1, 0, 3456, 1, 0, 3472, 1, 0, 3488, 1, 0, 4288, 5, 0, 4288, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267535888932030_217_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267535888932030_217_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca6c3a25 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267535888932030_217_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,263 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2048, 4, 0, 7552, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267535941511543_218_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267535941511543_218_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..96eeb9a2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267535941511543_218_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 2752, 4, 0, 4096, 2, 0, 5248, 2, 0, 6720, 6, 0, 6720, 6, 0, 6736, 6, 0, 6736, 6, 0, 6752, 6, 0, 6752, 6, 0, 8320, 4, 0, 8336, 4, 0, 8352, 4, 0, 9408, 2, 0, 9424, 2, 0, 9440, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267536005990148_219_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267536005990148_219_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dc8accf4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267536005990148_219_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267536094820161_221_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267536094820161_221_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..93f1f56e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267536094820161_221_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,533 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((344 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((359 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((368 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (429 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (440 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (478 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (474 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (468 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (464 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 1232, 1, 0, 1248, 1, 0, 2368, 1, 0, 2384, 1, 0, 2400, 1, 0, 3268, 1, 0, 3284, 1, 0, 3300, 1, 0, 4288, 1, 0, 4304, 1, 0, 4320, 1, 0, 4736, 1, 0, 4752, 1, 0, 4768, 1, 0, 9808, 2, 0, 12928, 4, 0, 17472, 8, 0, 19264, 9, 0, 19264, 9, 0, 30592, 3, 0, 30592, 3, 0, 30336, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267536275002478_222_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267536275002478_222_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2734acf4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267536275002478_222_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1872, 9, 0, 1872, 9, 0, 1888, 9, 0, 1888, 9, 0, 1904, 9, 0, 1904, 9, 0, 3152, 9, 0, 3152, 9, 0, 3168, 9, 0, 3168, 9, 0, 3184, 9, 0, 3184, 9, 0, 8960, 4, 0, 9536, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267536449498165_224_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267536449498165_224_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..05546fbf --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267536449498165_224_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((110 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5696, 1, 0, 5700, 1, 0, 5712, 1, 0, 5716, 1, 0, 7040, 1, 0, 7044, 1, 0, 7056, 1, 0, 7060, 1, 0, 7936, 1, 0, 7952, 1, 0, 9600, 4, 0, 9616, 4, 0, 12288, 4, 0, 12304, 4, 0, 12736, 12, 0, 12736, 12, 0, 13376, 5, 0, 13376, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267536572437928_225_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267536572437928_225_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c2edc49 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267536572437928_225_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 10, 0, 976, 10, 0, 992, 10, 0, 992, 10, 0, 1008, 10, 0, 1008, 10, 0, 2004, 5, 0, 2004, 5, 0, 2020, 5, 0, 2020, 5, 0, 2036, 5, 0, 2036, 5, 0, 3092, 3, 0, 3092, 3, 0, 3108, 3, 0, 3108, 3, 0, 3124, 3, 0, 3124, 3, 0, 3796, 10, 0, 3796, 10, 0, 3812, 10, 0, 3812, 10, 0, 3828, 10, 0, 3828, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267536805474680_227_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267536805474680_227_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2f3a9865 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267536805474680_227_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,314 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((220 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((227 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4752, 1, 0, 4756, 1, 0, 4760, 1, 0, 4768, 1, 0, 4772, 1, 0, 4776, 1, 0, 4784, 1, 0, 4788, 1, 0, 4792, 1, 0, 5904, 1, 0, 5920, 1, 0, 5936, 1, 0, 6784, 4, 0, 7232, 8, 0, 8400, 9, 0, 8400, 9, 0, 9040, 3, 0, 9040, 3, 0, 13008, 12, 0, 13008, 12, 0, 14100, 8, 0, 14996, 4, 0, 16016, 12, 0, 16016, 12, 0, 16720, 6, 0, 16720, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267537081863216_229_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267537081863216_229_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..27cec69f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267537081863216_229_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 1, 0, 2384, 1, 0, 2400, 1, 0, 2944, 1, 0, 5056, 4, 0, 5504, 8, 0, 9024, 13, 0, 9024, 13, 0, 9024, 13, 0, 8640, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267537151553529_230_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267537151553529_230_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c418ac4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267537151553529_230_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((150 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8064, 2, 0, 8068, 2, 0, 8080, 2, 0, 8084, 2, 0, 8096, 2, 0, 8100, 2, 0, 10048, 8, 0, 10052, 8, 0, 10064, 8, 0, 10068, 8, 0, 10080, 8, 0, 10084, 8, 0, 10496, 8, 0, 10500, 8, 0, 10512, 8, 0, 10516, 8, 0, 10528, 8, 0, 10532, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267537264687082_231_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267537264687082_231_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c93513a1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267537264687082_231_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,383 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 7936, 8, 0, 9280, 9, 0, 9280, 9, 0, 15680, 1, 0, 16320, 5, 0, 16320, 5, 0, 16960, 1, 0, 17920, 1, 0, 17936, 1, 0, 18368, 1, 0, 18384, 1, 0, 20160, 12, 0, 20160, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267537447069673_233_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267537447069673_233_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4b97cd94 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267537447069673_233_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,416 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((288 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (353 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 201 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 10, 0, 1488, 10, 0, 1504, 10, 0, 1504, 10, 0, 1520, 10, 0, 1520, 10, 0, 2580, 9, 0, 2580, 9, 0, 2584, 9, 0, 2584, 9, 0, 2588, 9, 0, 2588, 9, 0, 2596, 9, 0, 2596, 9, 0, 2600, 9, 0, 2600, 9, 0, 2604, 9, 0, 2604, 9, 0, 2612, 9, 0, 2612, 9, 0, 2616, 9, 0, 2616, 9, 0, 2620, 9, 0, 2620, 9, 0, 3476, 4, 0, 3480, 4, 0, 3484, 4, 0, 3492, 4, 0, 3496, 4, 0, 3500, 4, 0, 3508, 4, 0, 3512, 4, 0, 3516, 4, 0, 4308, 8, 0, 4312, 8, 0, 4316, 8, 0, 4324, 8, 0, 4328, 8, 0, 4332, 8, 0, 4340, 8, 0, 4344, 8, 0, 4348, 8, 0, 5264, 7, 0, 5264, 7, 0, 5264, 7, 0, 5280, 7, 0, 5280, 7, 0, 5280, 7, 0, 5296, 7, 0, 5296, 7, 0, 5296, 7, 0, 6932, 8, 0, 6936, 8, 0, 6940, 8, 0, 8532, 1, 0, 8536, 1, 0, 8540, 1, 0, 9428, 8, 0, 9432, 8, 0, 9436, 8, 0, 17024, 4, 0, 17040, 4, 0, 19136, 4, 0, 19152, 4, 0, 20224, 9, 0, 20224, 9, 0, 22336, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538009643617_234_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538009643617_234_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..96c301ca --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538009643617_234_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,397 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((219 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((271 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((327 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((387 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 3)) { + counter9 = (counter9 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((409 << 6) | (counter8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter9 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((427 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 4672, 9, 0, 4672, 9, 0, 14020, 2, 0, 14024, 2, 0, 14028, 2, 0, 14036, 2, 0, 14040, 2, 0, 14044, 2, 0, 14052, 2, 0, 14056, 2, 0, 14060, 2, 0, 27344, 4, 0, 27360, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538177465881_235_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538177465881_235_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..78ac0e02 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538177465881_235_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1728, 8, 0, 4096, 8, 0, 4800, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538236881783_236_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538236881783_236_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..79f1793a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538236881783_236_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,192 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 6784, 4, 0, 9152, 14, 0, 9152, 14, 0, 9152, 14, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538302628133_237_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538302628133_237_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..042aea42 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538302628133_237_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,247 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 3776, 4, 0, 6848, 8, 0, 6852, 8, 0, 6864, 8, 0, 6868, 8, 0, 6880, 8, 0, 6884, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538366045486_238_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538366045486_238_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c368054 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538366045486_238_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1152, 5, 0, 1152, 5, 0, 4672, 5, 0, 4672, 5, 0, 4688, 5, 0, 4688, 5, 0, 4704, 5, 0, 4704, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538427011758_239_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538427011758_239_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..61d1d5e8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538427011758_239_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 4, 0, 1056, 4, 0, 1072, 4, 0, 2704, 1, 0, 2720, 1, 0, 2736, 1, 0, 3792, 1, 0, 3796, 1, 0, 3800, 1, 0, 3808, 1, 0, 3812, 1, 0, 3816, 1, 0, 3824, 1, 0, 3828, 1, 0, 3832, 1, 0, 5520, 1, 0, 5536, 1, 0, 5552, 1, 0, 7696, 4, 0, 7712, 4, 0, 7728, 4, 0, 8144, 4, 0, 8160, 4, 0, 8176, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538602541268_241_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538602541268_241_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..919bc479 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538602541268_241_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((266 << 6) | (counter4 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8512, 4, 0, 8960, 8, 0, 10880, 1, 0, 10896, 1, 0, 10912, 1, 0, 12032, 1, 0, 12048, 1, 0, 12064, 1, 0, 12736, 1, 0, 12752, 1, 0, 12768, 1, 0, 15888, 2, 0, 15904, 2, 0, 15920, 2, 0, 17044, 8, 0, 17048, 8, 0, 17060, 8, 0, 17064, 8, 0, 17076, 8, 0, 17080, 8, 0, 17488, 8, 0, 17504, 8, 0, 17520, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538758231259_242_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538758231259_242_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fc4b04cf --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538758231259_242_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,92 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1152, 5, 0, 1152, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538801407267_243_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538801407267_243_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8b593459 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538801407267_243_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3216, 8, 0, 3232, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267538850024546_244_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267538850024546_244_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf438c25 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267538850024546_244_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,319 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((228 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2436, 1, 0, 2440, 1, 0, 2452, 1, 0, 2456, 1, 0, 3780, 8, 0, 3784, 8, 0, 3796, 8, 0, 3800, 8, 0, 4672, 8, 0, 4688, 8, 0, 12608, 2, 0, 14596, 2, 0, 14600, 2, 0, 14612, 2, 0, 14616, 2, 0, 14628, 2, 0, 14632, 2, 0, 15172, 2, 0, 15176, 2, 0, 15188, 2, 0, 15192, 2, 0, 15204, 2, 0, 15208, 2, 0, 15872, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267539258321271_245_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267539258321271_245_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a403d829 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267539258321271_245_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,293 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((168 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2240, 8, 0, 3456, 8, 0, 3472, 8, 0, 3488, 8, 0, 4740, 8, 0, 4744, 8, 0, 4756, 8, 0, 4760, 8, 0, 4772, 8, 0, 4776, 8, 0, 5696, 8, 0, 9040, 8, 0, 9056, 8, 0, 9680, 8, 0, 9696, 8, 0, 10768, 8, 0, 10772, 8, 0, 10776, 8, 0, 10784, 8, 0, 10788, 8, 0, 10792, 8, 0, 11216, 1, 0, 11232, 1, 0, 11856, 1, 0, 11872, 1, 0, 13200, 2, 0, 13216, 2, 0, 13648, 12, 0, 13648, 12, 0, 13664, 12, 0, 13664, 12, 0, 16192, 12, 0, 16192, 12, 0, 15808, 2, 0, 15168, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267539385344562_246_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267539385344562_246_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fd0c6d0f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267539385344562_246_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,190 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 1, 0, 1280, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267539433024015_247_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267539433024015_247_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53544e4e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267539433024015_247_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,279 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((161 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((206 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((251 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 1, 0, 11328, 4, 0, 13188, 8, 0, 13192, 8, 0, 13196, 8, 0, 13204, 8, 0, 13208, 8, 0, 13212, 8, 0, 14340, 8, 0, 14344, 8, 0, 14348, 8, 0, 14356, 8, 0, 14360, 8, 0, 14364, 8, 0, 15172, 8, 0, 15176, 8, 0, 15180, 8, 0, 15188, 8, 0, 15192, 8, 0, 15196, 8, 0, 16068, 8, 0, 16072, 8, 0, 16076, 8, 0, 16084, 8, 0, 16088, 8, 0, 16092, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267539561721702_248_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267539561721702_248_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f82d3896 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267539561721702_248_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,398 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((62 << 6) | (counter0 << 4)) | (counter1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((71 << 6) | (counter0 << 4)) | (counter1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((329 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((344 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 5, 0, 1360, 5, 0, 1376, 5, 0, 1376, 5, 0, 2260, 8, 0, 2264, 8, 0, 2268, 8, 0, 2276, 8, 0, 2280, 8, 0, 2284, 8, 0, 4564, 4, 0, 4565, 4, 0, 4566, 4, 0, 4568, 4, 0, 4569, 4, 0, 4570, 4, 0, 4572, 4, 0, 4573, 4, 0, 4574, 4, 0, 4580, 4, 0, 4581, 4, 0, 4582, 4, 0, 4584, 4, 0, 4585, 4, 0, 4586, 4, 0, 4588, 4, 0, 4589, 4, 0, 4590, 4, 0, 5780, 2, 0, 5784, 2, 0, 5788, 2, 0, 5796, 2, 0, 5800, 2, 0, 5804, 2, 0, 6736, 13, 0, 6736, 13, 0, 6736, 13, 0, 6752, 13, 0, 6752, 13, 0, 6752, 13, 0, 8000, 1, 0, 8016, 1, 0, 8032, 1, 0, 8900, 8, 0, 8916, 8, 0, 8932, 8, 0, 9348, 8, 0, 9364, 8, 0, 9380, 8, 0, 10112, 8, 0, 10128, 8, 0, 10144, 8, 0, 16448, 9, 0, 16448, 9, 0, 16464, 9, 0, 16464, 9, 0, 16480, 9, 0, 16480, 9, 0, 17088, 9, 0, 17088, 9, 0, 17104, 9, 0, 17104, 9, 0, 17120, 9, 0, 17120, 9, 0, 17984, 4, 0, 18000, 4, 0, 18016, 4, 0, 21056, 2, 0, 21072, 2, 0, 21088, 2, 0, 22016, 10, 0, 22016, 10, 0, 22032, 10, 0, 22032, 10, 0, 22048, 10, 0, 22048, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267540475879854_249_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267540475879854_249_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d13629c2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267540475879854_249_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 13, 0, 3264, 13, 0, 3264, 13, 0, 2496, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267540526590047_250_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267540526590047_250_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9f734e25 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267540526590047_250_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2304, 8, 0, 2308, 8, 0, 2320, 8, 0, 2324, 8, 0, 2336, 8, 0, 2340, 8, 0, 3456, 8, 0, 3460, 8, 0, 3472, 8, 0, 3476, 8, 0, 3488, 8, 0, 3492, 8, 0, 3904, 2, 0, 3908, 2, 0, 3920, 2, 0, 3924, 2, 0, 3936, 2, 0, 3940, 2, 0, 6656, 10, 0, 6656, 10, 0, 9152, 2, 0, 9168, 2, 0, 9184, 2, 0, 10304, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267540673896723_251_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267540673896723_251_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aff8bcd9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267540673896723_251_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,440 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((330 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((371 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((390 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((400 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((409 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((414 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((421 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((432 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((447 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (451 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5120, 8, 0, 23744, 4, 0, 23760, 4, 0, 23776, 4, 0, 26496, 4, 0, 26500, 4, 0, 26504, 4, 0, 26512, 4, 0, 26516, 4, 0, 26520, 4, 0, 26528, 4, 0, 26532, 4, 0, 26536, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267540950304792_253_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267540950304792_253_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53ab3e9c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267540950304792_253_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,332 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 4, 0, 3264, 9, 0, 3264, 9, 0, 4880, 1, 0, 4884, 1, 0, 4888, 1, 0, 4896, 1, 0, 4900, 1, 0, 4904, 1, 0, 5328, 1, 0, 5332, 1, 0, 5336, 1, 0, 5344, 1, 0, 5348, 1, 0, 5352, 1, 0, 5776, 1, 0, 5792, 1, 0, 6656, 4, 0, 7568, 8, 0, 7584, 8, 0, 7600, 8, 0, 9232, 8, 0, 9248, 8, 0, 9264, 8, 0, 10192, 8, 0, 10208, 8, 0, 10224, 8, 0, 10640, 8, 0, 10656, 8, 0, 10672, 8, 0, 14208, 5, 0, 14208, 5, 0, 14656, 8, 0, 15872, 10, 0, 15872, 10, 0, 15888, 10, 0, 15888, 10, 0, 16448, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267541101223191_254_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267541101223191_254_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aca01829 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267541101223191_254_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,263 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2048, 5, 0, 2048, 5, 0, 3216, 8, 0, 3232, 8, 0, 5520, 2, 0, 5536, 2, 0, 6096, 8, 0, 6112, 8, 0, 6800, 8, 0, 6816, 8, 0, 8576, 4, 0, 9152, 5, 0, 9152, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267541180969746_255_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267541180969746_255_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b13a1b81 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267541180969746_255_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,283 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7424, 4, 0, 8576, 8, 0, 9280, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267541233379462_256_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267541233379462_256_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fb70d8ab --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267541233379462_256_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 1, 0, 1952, 1, 0, 1968, 1, 0, 4160, 4, 0, 6336, 8, 0, 8640, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267541293912437_257_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267541293912437_257_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6c120d9a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267541293912437_257_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 13, 0, 1088, 13, 0, 1088, 13, 0, 1728, 5, 0, 1728, 5, 0, 2880, 1, 0, 4288, 9, 0, 4288, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267541342565042_258_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267541342565042_258_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..808408eb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267541342565042_258_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 2560, 1, 0, 2564, 1, 0, 2568, 1, 0, 2576, 1, 0, 2580, 1, 0, 2584, 1, 0, 3200, 1, 0, 3204, 1, 0, 3208, 1, 0, 3216, 1, 0, 3220, 1, 0, 3224, 1, 0, 4544, 8, 0, 4548, 8, 0, 4552, 8, 0, 4560, 8, 0, 4564, 8, 0, 4568, 8, 0, 7568, 1, 0, 7584, 1, 0, 9024, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267541505176486_259_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267541505176486_259_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ead606d6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267541505176486_259_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((188 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 9, 0, 1216, 9, 0, 3264, 2, 0, 3840, 2, 0, 8384, 4, 0, 8400, 4, 0, 8416, 4, 0, 9984, 5, 0, 9984, 5, 0, 12052, 8, 0, 12056, 8, 0, 12060, 8, 0, 12068, 8, 0, 12072, 8, 0, 12076, 8, 0, 13440, 8, 0, 14336, 5, 0, 14336, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267541597553604_260_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267541597553604_260_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..32ce518d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267541597553604_260_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,151 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 10, 0, 1728, 10, 0, 2640, 8, 0, 2656, 8, 0, 3280, 8, 0, 3296, 8, 0, 4432, 8, 0, 4448, 8, 0, 5136, 8, 0, 5152, 8, 0, 7568, 1, 0, 7584, 1, 0, 8640, 3, 0, 8640, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267541675909479_261_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267541675909479_261_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..60598757 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267541675909479_261_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,345 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 1, 0, 1504, 1, 0, 4480, 2, 0, 4496, 2, 0, 7616, 4, 0, 7620, 4, 0, 7624, 4, 0, 7632, 4, 0, 7636, 4, 0, 7640, 4, 0, 10816, 8, 0, 12416, 8, 0, 12432, 8, 0, 12448, 8, 0, 13120, 8, 0, 14480, 5, 0, 14480, 5, 0, 14496, 5, 0, 14496, 5, 0, 14512, 5, 0, 14512, 5, 0, 20992, 9, 0, 20992, 9, 0, 20608, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267542319100711_263_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267542319100711_263_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..06090f05 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267542319100711_263_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 5, 0, 832, 5, 0, 1472, 1, 0, 4608, 4, 0, 7808, 1, 0, 7824, 1, 0, 7840, 1, 0, 8640, 1, 0, 8656, 1, 0, 8672, 1, 0, 9344, 1, 0, 9360, 1, 0, 9376, 1, 0, 10256, 8, 0, 10272, 8, 0, 11600, 8, 0, 11604, 8, 0, 11616, 8, 0, 11620, 8, 0, 12304, 8, 0, 12308, 8, 0, 12320, 8, 0, 12324, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267542457690721_264_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267542457690721_264_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e95efab6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267542457690721_264_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,728 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((350 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((365 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((409 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((416 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (429 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (443 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (453 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (471 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (481 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (490 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (495 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (499 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (510 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (515 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((533 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((551 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 2)) { + counter9 = (counter9 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((565 << 6) | (counter8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((572 << 6) | (counter8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((583 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((592 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (616 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (627 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (636 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (645 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i10 = 0; (i10 < 3); i10 = (i10 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((662 << 6) | (i10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i11 = 0; (i11 < 3); i11 = (i11 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((681 << 6) | (i11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i12 = 0; (i12 < 2); i12 = (i12 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((700 << 6) | (i11 << 4)) | (i12 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((711 << 6) | (i11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter13 = 0; + while ((counter13 < 2)) { + counter13 = (counter13 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((729 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((747 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((754 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((761 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((772 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 2448, 1, 0, 2464, 1, 0, 2480, 1, 0, 3840, 1, 0, 11520, 8, 0, 13376, 5, 0, 13376, 5, 0, 17472, 1, 0, 28992, 1, 0, 37904, 4, 0, 37920, 4, 0, 37936, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267542675014770_265_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267542675014770_265_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d7f3d7ee --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267542675014770_265_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,213 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 1920, 8, 0, 3008, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267542729712672_266_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267542729712672_266_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e1848dc8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267542729712672_266_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 5, 0, 1024, 5, 0, 1040, 5, 0, 1040, 5, 0, 2304, 13, 0, 2304, 13, 0, 2304, 13, 0, 2320, 13, 0, 2320, 13, 0, 2320, 13, 0, 3264, 4, 0, 3268, 4, 0, 3272, 4, 0, 3280, 4, 0, 3284, 4, 0, 3288, 4, 0, 3968, 9, 0, 3968, 9, 0, 3984, 9, 0, 3984, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267542910035005_268_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267542910035005_268_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..67644bae --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267542910035005_268_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1792, 5, 0, 1792, 5, 0, 3008, 5, 0, 3008, 5, 0, 3648, 9, 0, 3648, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267542965224817_269_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267542965224817_269_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44e12fe6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267542965224817_269_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((129 << 6) | (counter3 << 4)) | (i4 << 2)) | counter5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((155 << 6) | (counter3 << 4)) | (i4 << 2)) | i6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 189 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 1, 0, 1056, 1, 0, 1940, 8, 0, 1944, 8, 0, 1948, 8, 0, 1956, 8, 0, 1960, 8, 0, 1964, 8, 0, 4288, 4, 0, 5200, 2, 0, 5216, 2, 0, 5232, 2, 0, 6992, 9, 0, 6992, 9, 0, 6996, 9, 0, 6996, 9, 0, 7008, 9, 0, 7008, 9, 0, 7012, 9, 0, 7012, 9, 0, 7024, 9, 0, 7024, 9, 0, 7028, 9, 0, 7028, 9, 0, 8273, 9, 0, 8273, 9, 0, 8274, 9, 0, 8274, 9, 0, 8277, 9, 0, 8277, 9, 0, 8278, 9, 0, 8278, 9, 0, 8289, 9, 0, 8289, 9, 0, 8290, 9, 0, 8290, 9, 0, 8293, 9, 0, 8293, 9, 0, 8294, 9, 0, 8294, 9, 0, 8305, 9, 0, 8305, 9, 0, 8306, 9, 0, 8306, 9, 0, 8309, 9, 0, 8309, 9, 0, 8310, 9, 0, 8310, 9, 0, 8976, 4, 0, 8980, 4, 0, 8992, 4, 0, 8996, 4, 0, 9008, 4, 0, 9012, 4, 0, 10640, 2, 0, 10644, 2, 0, 10656, 2, 0, 10660, 2, 0, 10672, 2, 0, 10676, 2, 0, 11472, 4, 0, 11488, 4, 0, 11504, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267543625022901_270_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267543625022901_270_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..61dbb885 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267543625022901_270_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 2112, 1, 0, 4480, 4, 0, 4496, 4, 0, 4512, 4, 0, 5508, 4, 0, 5512, 4, 0, 5524, 4, 0, 5528, 4, 0, 5540, 4, 0, 5544, 4, 0, 6208, 4, 0, 6224, 4, 0, 6240, 4, 0, 11200, 8, 0, 14336, 12, 0, 14336, 12, 0, 13952, 2, 0, 13312, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267543792903008_272_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267543792903008_272_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..62f2beec --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267543792903008_272_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1344, 8, 0, 2560, 8, 0, 2576, 8, 0, 2592, 8, 0, 3716, 8, 0, 3720, 8, 0, 3732, 8, 0, 3736, 8, 0, 3748, 8, 0, 3752, 8, 0, 4420, 8, 0, 4424, 8, 0, 4436, 8, 0, 4440, 8, 0, 4452, 8, 0, 4456, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267543858794417_273_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267543858794417_273_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..072572fb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267543858794417_273_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if ((WaveGetLaneIndex() == 0)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 1, 0, 1552, 1, 0, 2688, 5, 0, 2688, 5, 0, 3136, 2, 0, 5520, 1, 0, 5536, 1, 0, 8640, 4, 0, 8656, 4, 0, 8672, 4, 0, 11456, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267543935099702_274_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267543935099702_274_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c5a135b1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267543935099702_274_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,549 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((401 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (421 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (425 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (435 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((460 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((475 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((490 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((507 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (514 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 1, 0, 2000, 1, 0, 2688, 1, 0, 2704, 1, 0, 3328, 1, 0, 15232, 4, 0, 17808, 8, 0, 17824, 8, 0, 19328, 5, 0, 19328, 5, 0, 20480, 8, 0, 23680, 8, 0, 29444, 4, 0, 29448, 4, 0, 29460, 4, 0, 29464, 4, 0, 29476, 4, 0, 29480, 4, 0, 32452, 4, 0, 32456, 4, 0, 32468, 4, 0, 32472, 4, 0, 32484, 4, 0, 32488, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267544182199727_277_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267544182199727_277_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a4a0a7ab --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267544182199727_277_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3840, 9, 0, 3840, 9, 0, 3456, 2, 0, 2432, 4, 0, 5248, 1, 0, 4992, 10, 0, 4992, 10, 0, 6336, 10, 0, 6336, 10, 0, 6352, 10, 0, 6352, 10, 0, 7232, 10, 0, 7232, 10, 0, 7248, 10, 0, 7248, 10, 0, 7808, 10, 0, 7808, 10, 0, 7824, 10, 0, 7824, 10, 0, 8512, 1, 0, 8528, 1, 0, 9152, 1, 0, 9168, 1, 0, 10880, 4, 0, 10896, 4, 0, 11712, 5, 0, 11712, 5, 0, 11728, 5, 0, 11728, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267544253052767_278_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267544253052767_278_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a9310b3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267544253052767_278_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7232, 4, 0, 7680, 8, 0, 11072, 5, 0, 11072, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267544532987158_281_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267544532987158_281_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5742d4f0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267544532987158_281_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1536, 2, 0, 1552, 2, 0, 1856, 4, 0, 2496, 1, 0, 5248, 4, 0, 5264, 4, 0, 8064, 8, 0, 9472, 9, 0, 9472, 9, 0, 10112, 9, 0, 10112, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267544607111290_282_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267544607111290_282_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c70c8b23 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267544607111290_282_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 1, 0, 3648, 1, 0, 7936, 2, 0, 7952, 2, 0, 7968, 2, 0, 8704, 4, 0, 10128, 8, 0, 10132, 8, 0, 10144, 8, 0, 10148, 8, 0, 10160, 8, 0, 10164, 8, 0, 13712, 8, 0, 13716, 8, 0, 13728, 8, 0, 13732, 8, 0, 13744, 8, 0, 13748, 8, 0, 14160, 8, 0, 14164, 8, 0, 14176, 8, 0, 14180, 8, 0, 14192, 8, 0, 14196, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267544696559340_283_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267544696559340_283_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..91ea278d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267544696559340_283_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,210 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 3408, 2, 0, 3412, 2, 0, 3424, 2, 0, 3428, 2, 0, 3440, 2, 0, 3444, 2, 0, 5904, 4, 0, 5920, 4, 0, 5936, 4, 0, 12032, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267544775586616_284_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267544775586616_284_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8d6515d3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267544775586616_284_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3072, 5, 0, 3072, 5, 0, 3712, 5, 0, 3712, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267544822904253_285_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267544822904253_285_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dbdd8540 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267544822904253_285_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,236 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 1, 0, 1056, 1, 0, 1072, 1, 0, 2192, 4, 0, 2208, 4, 0, 2224, 4, 0, 5952, 8, 0, 5956, 8, 0, 5960, 8, 0, 5968, 8, 0, 5972, 8, 0, 5976, 8, 0, 5984, 8, 0, 5988, 8, 0, 5992, 8, 0, 9984, 8, 0, 10000, 8, 0, 10016, 8, 0, 11200, 5, 0, 11200, 5, 0, 11216, 5, 0, 11216, 5, 0, 11840, 1, 0, 12736, 4, 0, 13184, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267544980434410_286_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267544980434410_286_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5b41e80 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267544980434410_286_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 8, 0, 3584, 4, 0, 6784, 13, 0, 6784, 13, 0, 6784, 13, 0, 6144, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267545131855289_288_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267545131855289_288_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5f164072 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267545131855289_288_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,371 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((273 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((317 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((339 << 6) | (counter6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((356 << 6) | (counter6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5136, 1, 0, 6224, 1, 0, 9216, 4, 0, 13824, 8, 0, 15168, 5, 0, 15168, 5, 0, 15184, 5, 0, 15184, 5, 0, 17472, 1, 0, 17476, 1, 0, 17480, 1, 0, 17488, 1, 0, 17492, 1, 0, 17496, 1, 0, 18176, 1, 0, 18192, 1, 0, 18752, 5, 0, 18752, 5, 0, 18768, 5, 0, 18768, 5, 0, 20304, 5, 0, 20304, 5, 0, 20320, 5, 0, 20320, 5, 0, 20336, 5, 0, 20336, 5, 0, 21716, 4, 0, 21720, 4, 0, 21724, 4, 0, 21732, 4, 0, 21736, 4, 0, 21740, 4, 0, 21748, 4, 0, 21752, 4, 0, 21756, 4, 0, 22804, 1, 0, 22808, 1, 0, 22812, 1, 0, 22820, 1, 0, 22824, 1, 0, 22828, 1, 0, 22836, 1, 0, 22840, 1, 0, 22844, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267545361003073_289_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267545361003073_289_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c6652ac6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267545361003073_289_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,147 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 1, 0, 1040, 1, 0, 2176, 9, 0, 2176, 9, 0, 2192, 9, 0, 2192, 9, 0, 2880, 9, 0, 2880, 9, 0, 2896, 9, 0, 2896, 9, 0, 4048, 9, 0, 4048, 9, 0, 4064, 9, 0, 4064, 9, 0, 5136, 4, 0, 5152, 4, 0, 6416, 6, 0, 6416, 6, 0, 6432, 6, 0, 6432, 6, 0, 7120, 9, 0, 7120, 9, 0, 7136, 9, 0, 7136, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267545456538922_290_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267545456538922_290_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..19c259c4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267545456538922_290_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,450 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((235 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((245 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((254 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((259 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((266 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((270 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((counter2 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((322 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((340 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((355 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((366 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((373 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [13648, 2, 0, 18448, 2, 0, 18944, 4, 0, 20612, 8, 0, 20616, 8, 0, 20628, 8, 0, 20632, 8, 0, 20644, 8, 0, 20648, 8, 0, 21764, 8, 0, 21768, 8, 0, 21780, 8, 0, 21784, 8, 0, 21796, 8, 0, 21800, 8, 0, 23872, 8, 0, 23888, 8, 0, 23904, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267545557885162_291_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267545557885162_291_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f7471ae7 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267545557885162_291_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1616, 10, 0, 1616, 10, 0, 3088, 2, 0, 3092, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267545619302372_292_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267545619302372_292_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..314d8d1d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267545619302372_292_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 8, 0, 3200, 10, 0, 3200, 10, 0, 3204, 10, 0, 3204, 10, 0, 3216, 10, 0, 3216, 10, 0, 3220, 10, 0, 3220, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267545689031556_293_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267545689031556_293_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d37b7d9b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267545689031556_293_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,400 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((126 << 6) | (i0 << 4)) | (counter1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((326 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((348 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((363 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((374 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 8, 0, 912, 8, 0, 928, 8, 0, 1728, 8, 0, 1744, 8, 0, 1760, 8, 0, 5376, 1, 0, 5392, 1, 0, 5408, 1, 0, 6916, 2, 0, 6932, 2, 0, 6948, 2, 0, 8069, 2, 0, 8070, 2, 0, 8085, 2, 0, 8086, 2, 0, 8101, 2, 0, 8102, 2, 0, 8960, 1, 0, 8976, 1, 0, 8992, 1, 0, 9408, 8, 0, 9424, 8, 0, 9440, 8, 0, 10240, 1, 0, 11920, 1, 0, 12496, 1, 0, 13120, 1, 0, 19648, 4, 0, 20864, 8, 0, 20880, 8, 0, 22276, 8, 0, 22280, 8, 0, 22284, 8, 0, 22292, 8, 0, 22296, 8, 0, 22300, 8, 0, 23236, 8, 0, 23240, 8, 0, 23244, 8, 0, 23252, 8, 0, 23256, 8, 0, 23260, 8, 0, 23936, 8, 0, 23952, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267546069810461_294_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267546069810461_294_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2c26dcad --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267546069810461_294_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 117 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2816, 9, 0, 2816, 9, 0, 2432, 2, 0, 3728, 12, 0, 3728, 12, 0, 3744, 12, 0, 3744, 12, 0, 4628, 2, 0, 4632, 2, 0, 4636, 2, 0, 4644, 2, 0, 4648, 2, 0, 4652, 2, 0, 5268, 9, 0, 5268, 9, 0, 5272, 9, 0, 5272, 9, 0, 5276, 9, 0, 5276, 9, 0, 5284, 9, 0, 5284, 9, 0, 5288, 9, 0, 5288, 9, 0, 5292, 9, 0, 5292, 9, 0, 6164, 4, 0, 6168, 4, 0, 6172, 4, 0, 6180, 4, 0, 6184, 4, 0, 6188, 4, 0, 6868, 4, 0, 6872, 4, 0, 6876, 4, 0, 6884, 4, 0, 6888, 4, 0, 6892, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267546303050436_295_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267546303050436_295_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b314664b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267546303050436_295_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,135 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 5, 0, 3200, 5, 0, 2560, 8, 0, 2176, 2, 0, 5440, 9, 0, 5440, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267546359157310_296_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267546359157310_296_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5a52113d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267546359157310_296_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 6212, 4, 0, 6216, 4, 0, 6228, 4, 0, 6232, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267546422751022_297_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267546422751022_297_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0ebebdc8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267546422751022_297_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,422 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((361 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((377 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((391 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 1, 0, 3072, 1, 0, 3712, 1, 0, 4288, 1, 0, 5568, 1, 0, 12864, 4, 0, 14016, 12, 0, 14016, 12, 0, 15056, 8, 0, 15072, 8, 0, 15088, 8, 0, 17344, 8, 0, 21248, 5, 0, 21248, 5, 0, 20992, 8, 0, 20224, 2, 0, 21888, 5, 0, 21888, 5, 0, 23104, 8, 0, 23120, 8, 0, 24132, 10, 0, 24132, 10, 0, 24136, 10, 0, 24136, 10, 0, 24140, 10, 0, 24140, 10, 0, 24148, 10, 0, 24148, 10, 0, 24152, 10, 0, 24152, 10, 0, 24156, 10, 0, 24156, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267546589933385_298_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267546589933385_298_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..91e23eb6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267546589933385_298_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,388 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((236 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((247 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6928, 1, 0, 6944, 1, 0, 8144, 1, 0, 8148, 1, 0, 8160, 1, 0, 8164, 1, 0, 9600, 2, 0, 9920, 4, 0, 19152, 8, 0, 20432, 1, 0, 21008, 5, 0, 21008, 5, 0, 21968, 5, 0, 21968, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267546695152949_299_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267546695152949_299_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..446404d2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267546695152949_299_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,339 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((176 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((195 << 6) | (counter0 << 4)) | (counter1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((206 << 6) | (counter0 << 4)) | (counter1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((264 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((282 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((293 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 177 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 8448, 4, 0, 8896, 8, 0, 10708, 9, 0, 10708, 9, 0, 10712, 9, 0, 10712, 9, 0, 10724, 9, 0, 10724, 9, 0, 10728, 9, 0, 10728, 9, 0, 10740, 9, 0, 10740, 9, 0, 10744, 9, 0, 10744, 9, 0, 13204, 4, 0, 13205, 4, 0, 13208, 4, 0, 13209, 4, 0, 13220, 4, 0, 13221, 4, 0, 13224, 4, 0, 13225, 4, 0, 13236, 4, 0, 13237, 4, 0, 13240, 4, 0, 13241, 4, 0, 13908, 5, 0, 13908, 5, 0, 13912, 5, 0, 13912, 5, 0, 13924, 5, 0, 13924, 5, 0, 13928, 5, 0, 13928, 5, 0, 13940, 5, 0, 13940, 5, 0, 13944, 5, 0, 13944, 5, 0, 14864, 10, 0, 14864, 10, 0, 14880, 10, 0, 14880, 10, 0, 14896, 10, 0, 14896, 10, 0, 15760, 1, 0, 15776, 1, 0, 16916, 9, 0, 16916, 9, 0, 16932, 9, 0, 16932, 9, 0, 18068, 9, 0, 18068, 9, 0, 18084, 9, 0, 18084, 9, 0, 18772, 1, 0, 18788, 1, 0, 19408, 2, 0, 19424, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547129614376_300_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547129614376_300_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..622ee852 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547129614376_300_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,215 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 4, 0, 2896, 4, 0, 4416, 4, 0, 4432, 4, 0, 4864, 4, 0, 4880, 4, 0, 5824, 4, 0, 5840, 4, 0, 6784, 4, 0, 6800, 4, 0, 7872, 9, 0, 7872, 9, 0, 8768, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547200629157_301_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547200629157_301_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a8546924 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547200629157_301_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1792, 9, 0, 1792, 9, 0, 2688, 4, 0, 6208, 9, 0, 6208, 9, 0, 5952, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547251287994_302_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547251287994_302_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b387a140 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547251287994_302_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,248 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 5, 0, 1152, 5, 0, 1168, 5, 0, 1168, 5, 0, 1984, 5, 0, 1984, 5, 0, 2624, 8, 0, 4288, 2, 0, 5248, 2, 0, 10000, 1, 0, 10016, 1, 0, 10032, 1, 0, 10880, 4, 0, 11328, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547360494288_304_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547360494288_304_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff299cf5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547360494288_304_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,212 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 6464, 4, 0, 6912, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547417043974_305_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547417043974_305_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..edae82fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547417043974_305_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1152, 1, 0, 3408, 1, 0, 3424, 1, 0, 3984, 2, 0, 4000, 2, 0, 4752, 2, 0, 4768, 2, 0, 6480, 1, 0, 6496, 1, 0, 6912, 15, 0, 6912, 15, 0, 6912, 15, 0, 6912, 15, 0, 7552, 5, 0, 7552, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547503668770_306_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547503668770_306_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3c4901b5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547503668770_306_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 13, 0, 1088, 13, 0, 1088, 13, 0, 2256, 13, 0, 2256, 13, 0, 2256, 13, 0, 2272, 13, 0, 2272, 13, 0, 2272, 13, 0, 3920, 12, 0, 3920, 12, 0, 3936, 12, 0, 3936, 12, 0, 4880, 13, 0, 4880, 13, 0, 4880, 13, 0, 4896, 13, 0, 4896, 13, 0, 4896, 13, 0, 6480, 13, 0, 6480, 13, 0, 6480, 13, 0, 6496, 13, 0, 6496, 13, 0, 6496, 13, 0, 7168, 13, 0, 7168, 13, 0, 7168, 13, 0, 10816, 6, 0, 10816, 6, 0, 10560, 8, 0, 10304, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547583217138_307_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547583217138_307_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f79484c4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547583217138_307_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1, 0, 1360, 1, 0, 1376, 1, 0, 4032, 1, 0, 4048, 1, 0, 4064, 1, 0, 5824, 2, 0, 6912, 2, 0, 6928, 2, 0, 6944, 2, 0, 9344, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547659571404_308_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547659571404_308_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..425f7b91 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547659571404_308_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1152, 5, 0, 1152, 5, 0, 3968, 2, 0, 3984, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547715926169_309_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547715926169_309_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..120ce86b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547715926169_309_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 1, 0, 3840, 8, 0, 3200, 4, 0, 2816, 2, 0, 7444, 8, 0, 7448, 8, 0, 7460, 8, 0, 7464, 8, 0, 11520, 13, 0, 11520, 13, 0, 11520, 13, 0, 10880, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547793119412_310_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547793119412_310_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..176afb8a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547793119412_310_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 1, 0, 1056, 1, 0, 1072, 1, 0, 1680, 1, 0, 1696, 1, 0, 1712, 1, 0, 2128, 1, 0, 2144, 1, 0, 2160, 1, 0, 3008, 4, 0, 3904, 5, 0, 3904, 5, 0, 4480, 5, 0, 4480, 5, 0, 5376, 5, 0, 5376, 5, 0, 7760, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267547899817020_311_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267547899817020_311_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e4328b4e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267547899817020_311_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2704, 2, 0, 2708, 2, 0, 2720, 2, 0, 2724, 2, 0, 3408, 2, 0, 3424, 2, 0, 3712, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267548695689080_313_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267548695689080_313_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fe880977 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267548695689080_313_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,342 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((262 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((280 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((291 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter6 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4, 0, 2640, 1, 0, 3520, 4, 0, 4480, 10, 0, 4480, 10, 0, 6080, 10, 0, 6080, 10, 0, 6096, 10, 0, 6096, 10, 0, 7296, 8, 0, 7300, 8, 0, 7304, 8, 0, 7312, 8, 0, 7316, 8, 0, 7320, 8, 0, 8448, 2, 0, 8464, 2, 0, 12608, 1, 0, 12624, 1, 0, 12640, 1, 0, 16788, 4, 0, 16792, 4, 0, 20032, 8, 0, 23168, 9, 0, 23168, 9, 0, 22784, 2, 0, 22400, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267548988308115_314_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267548988308115_314_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..57c216ec --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267548988308115_314_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,246 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3328, 13, 0, 3328, 13, 0, 3328, 13, 0, 2944, 2, 0, 3968, 1, 0, 11152, 8, 0, 12624, 8, 0, 12628, 8, 0, 12632, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267549056992693_315_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267549056992693_315_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b8596609 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267549056992693_315_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1488, 3, 0, 1488, 3, 0, 2640, 1, 0, 3856, 1, 0, 3860, 1, 0, 3864, 1, 0, 4560, 1, 0, 4564, 1, 0, 4568, 1, 0, 5264, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267549189921617_317_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267549189921617_317_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8e9f49e2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267549189921617_317_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,126 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 3088, 8, 0, 3104, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267549235963816_318_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267549235963816_318_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d4f9e6c4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267549235963816_318_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,132 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 6, 0, 2048, 6, 0, 3792, 8, 0, 3796, 8, 0, 3800, 8, 0, 3808, 8, 0, 3812, 8, 0, 3816, 8, 0, 5008, 8, 0, 5024, 8, 0, 6080, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267549322761017_319_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267549322761017_319_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..36b295db --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267549322761017_319_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((188 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((202 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((216 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 2, 0, 2496, 5, 0, 2496, 5, 0, 2112, 8, 0, 3392, 9, 0, 3392, 9, 0, 8384, 4, 0, 11396, 8, 0, 11400, 8, 0, 11404, 8, 0, 11412, 8, 0, 11416, 8, 0, 11420, 8, 0, 11428, 8, 0, 11432, 8, 0, 11436, 8, 0, 13380, 8, 0, 13384, 8, 0, 13388, 8, 0, 13396, 8, 0, 13400, 8, 0, 13404, 8, 0, 13412, 8, 0, 13416, 8, 0, 13420, 8, 0, 13828, 8, 0, 13832, 8, 0, 13836, 8, 0, 13844, 8, 0, 13848, 8, 0, 13852, 8, 0, 13860, 8, 0, 13864, 8, 0, 13868, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267549651379099_320_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267549651379099_320_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44cecf43 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267549651379099_320_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 6784, 12, 0, 6784, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267549709169709_321_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267549709169709_321_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2c4b85b5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267549709169709_321_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 832, 10, 0, 832, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267549909108401_323_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267549909108401_323_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4d6c0e24 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267549909108401_323_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,181 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 10, 0, 976, 10, 0, 1792, 5, 0, 1792, 5, 0, 3968, 10, 0, 3968, 10, 0, 3972, 10, 0, 3972, 10, 0, 3984, 10, 0, 3984, 10, 0, 3988, 10, 0, 3988, 10, 0, 4000, 10, 0, 4000, 10, 0, 4004, 10, 0, 4004, 10, 0, 5952, 8, 0, 5956, 8, 0, 5968, 8, 0, 5972, 8, 0, 5984, 8, 0, 5988, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267550132647979_325_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267550132647979_325_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca2f9891 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267550132647979_325_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,175 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 8, 0, 4352, 8, 0, 6272, 8, 0, 7892, 2, 0, 7908, 2, 0, 8596, 2, 0, 8612, 2, 0, 9488, 2, 0, 9504, 2, 0, 9792, 6, 0, 9792, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267550191811749_326_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267550191811749_326_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..81e4bbf6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267550191811749_326_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,164 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 4608, 5, 0, 4608, 5, 0, 5248, 9, 0, 5248, 9, 0, 5824, 1, 0, 6144, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267550339791960_328_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267550339791960_328_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3068a8a5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267550339791960_328_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 8, 0, 2752, 2, 0, 2496, 1, 0, 4176, 5, 0, 4176, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267550531843867_331_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267550531843867_331_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267550531843867_331_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267550575678430_332_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267550575678430_332_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8bde34d8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267550575678430_332_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((25 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 2, 0, 5632, 2, 0, 6336, 4, 0, 6976, 4, 0, 8704, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267550638696693_333_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267550638696693_333_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..60178f42 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267550638696693_333_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,437 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + break; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 3776, 4, 0, 4672, 5, 0, 4672, 5, 0, 5312, 8, 0, 13376, 9, 0, 13376, 9, 0, 21568, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267550854118015_335_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267550854118015_335_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3875e04e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267550854118015_335_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,496 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((247 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((256 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((261 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((280 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((335 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((350 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((359 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((368 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((378 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((387 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((392 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((399 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2192, 4, 0, 2208, 4, 0, 3284, 4, 0, 3288, 4, 0, 3300, 4, 0, 3304, 4, 0, 4564, 4, 0, 4568, 4, 0, 4580, 4, 0, 4584, 4, 0, 5136, 4, 0, 5152, 4, 0, 9280, 9, 0, 9280, 9, 0, 11328, 1, 0, 11648, 15, 0, 11648, 15, 0, 11648, 15, 0, 11648, 15, 0, 12288, 1, 0, 16724, 4, 0, 16728, 4, 0, 16732, 4, 0, 16740, 4, 0, 16744, 4, 0, 16748, 4, 0, 16756, 4, 0, 16760, 4, 0, 16764, 4, 0, 17940, 4, 0, 17944, 4, 0, 17948, 4, 0, 17956, 4, 0, 17960, 4, 0, 17964, 4, 0, 17972, 4, 0, 17976, 4, 0, 17980, 4, 0, 21440, 8, 0, 21456, 8, 0, 21472, 8, 0, 22400, 8, 0, 22416, 8, 0, 22432, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551103154321_336_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551103154321_336_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f1facd6b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551103154321_336_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,294 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 13, 0, 1088, 13, 0, 1088, 13, 0, 2752, 1, 0, 3392, 1, 0, 7760, 10, 0, 7760, 10, 0, 7776, 10, 0, 7776, 10, 0, 7792, 10, 0, 7792, 10, 0, 10004, 5, 0, 10004, 5, 0, 10020, 5, 0, 10020, 5, 0, 10036, 5, 0, 10036, 5, 0, 10580, 10, 0, 10580, 10, 0, 10596, 10, 0, 10596, 10, 0, 10612, 10, 0, 10612, 10, 0, 11028, 1, 0, 11044, 1, 0, 11060, 1, 0, 13440, 1, 0, 18304, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551235658679_337_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551235658679_337_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c581c3cb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551235658679_337_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 1, 0, 3008, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551330790136_339_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551330790136_339_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8b70c57e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551330790136_339_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,212 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 1)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((173 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2368, 2, 0, 4544, 4, 0, 8832, 8, 0, 10128, 8, 0, 10144, 8, 0, 10160, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551386028999_340_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551386028999_340_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f884d155 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551386028999_340_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6720, 8, 0, 11648, 8, 0, 12672, 5, 0, 12672, 5, 0, 13504, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551495147820_341_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551495147820_341_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..76e86cfe --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551495147820_341_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,278 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 8, 0, 912, 8, 0, 2176, 9, 0, 2176, 9, 0, 2192, 9, 0, 2192, 9, 0, 3520, 9, 0, 3520, 9, 0, 3536, 9, 0, 3536, 9, 0, 4096, 4, 0, 4112, 4, 0, 5440, 4, 0, 5456, 4, 0, 7296, 4, 0, 7312, 4, 0, 7744, 1, 0, 7760, 1, 0, 8768, 4, 0, 13568, 2, 0, 14992, 2, 0, 15008, 2, 0, 15024, 2, 0, 15952, 2, 0, 15968, 2, 0, 15984, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551581871881_342_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551581871881_342_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4181937d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551581871881_342_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 1, 0, 1552, 1, 0, 1568, 1, 0, 5312, 9, 0, 5312, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551649248821_343_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551649248821_343_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..650cf629 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551649248821_343_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,202 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1792, 5, 0, 1792, 5, 0, 2896, 8, 0, 2912, 8, 0, 2928, 8, 0, 4160, 5, 0, 4160, 5, 0, 5456, 1, 0, 5472, 1, 0, 5488, 1, 0, 6160, 1, 0, 6176, 1, 0, 6192, 1, 0, 6784, 1, 0, 7360, 1, 0, 7680, 7, 0, 7680, 7, 0, 7680, 7, 0, 8128, 15, 0, 8128, 15, 0, 8128, 15, 0, 8128, 15, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551739864843_344_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551739864843_344_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef5f95ac --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551739864843_344_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 2640, 8, 0, 2656, 8, 0, 4048, 8, 0, 4052, 8, 0, 4064, 8, 0, 4068, 8, 0, 4944, 8, 0, 4960, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551813184383_345_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551813184383_345_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f74d256c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551813184383_345_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,154 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2560, 4, 0, 4032, 1, 0, 4928, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551876779340_346_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551876779340_346_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6f3a80ac --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551876779340_346_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,283 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 1, 0, 6656, 2, 0, 8960, 4, 0, 10368, 8, 0, 10384, 8, 0, 10400, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267551931978054_347_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267551931978054_347_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c633d53 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267551931978054_347_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,371 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((344 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((356 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((385 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 3332, 1, 0, 3336, 1, 0, 3348, 1, 0, 3352, 1, 0, 4032, 1, 0, 4048, 1, 0, 10624, 4, 0, 11264, 8, 0, 22016, 2, 0, 22032, 2, 0, 22048, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267552020143536_348_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267552020143536_348_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1acdfc2d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267552020143536_348_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((110 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 3152, 8, 0, 3168, 8, 0, 3184, 8, 0, 4352, 8, 0, 5952, 6, 0, 5952, 6, 0, 5968, 6, 0, 5968, 6, 0, 5984, 6, 0, 5984, 6, 0, 7040, 2, 0, 7044, 2, 0, 7048, 2, 0, 7056, 2, 0, 7060, 2, 0, 7064, 2, 0, 7072, 2, 0, 7076, 2, 0, 7080, 2, 0, 8832, 4, 0, 8836, 4, 0, 8840, 4, 0, 8848, 4, 0, 8852, 4, 0, 8856, 4, 0, 8864, 4, 0, 8868, 4, 0, 8872, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267552158536343_349_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267552158536343_349_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b612e75d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267552158536343_349_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1152, 1, 0, 1472, 7, 0, 1472, 7, 0, 1472, 7, 0, 1920, 15, 0, 1920, 15, 0, 1920, 15, 0, 1920, 15, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267552204400178_350_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267552204400178_350_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7d26c76b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267552204400178_350_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,410 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((350 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((370 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((381 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (389 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 6848, 4, 0, 9088, 1, 0, 11664, 1, 0, 11680, 1, 0, 11696, 1, 0, 16768, 8, 0, 20032, 8, 0, 24896, 6, 0, 24896, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267552464440056_352_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267552464440056_352_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0f8804be --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267552464440056_352_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,398 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((159 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((323 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((339 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((357 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((375 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 3, 0, 1600, 3, 0, 3348, 9, 0, 3348, 9, 0, 3364, 9, 0, 3364, 9, 0, 8704, 4, 0, 8720, 4, 0, 11904, 4, 0, 11920, 4, 0, 12544, 5, 0, 12544, 5, 0, 13760, 1, 0, 21712, 8, 0, 21728, 8, 0, 21744, 8, 0, 22864, 8, 0, 22880, 8, 0, 22896, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267552622156341_353_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267552622156341_353_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e4ca4ccb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267552622156341_353_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((168 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3072, 1, 0, 3088, 1, 0, 3104, 1, 0, 4352, 1, 0, 4368, 1, 0, 4384, 1, 0, 6928, 2, 0, 6944, 2, 0, 10768, 8, 0, 10772, 8, 0, 10776, 8, 0, 10784, 8, 0, 10788, 8, 0, 10792, 8, 0, 11600, 8, 0, 11604, 8, 0, 11608, 8, 0, 11616, 8, 0, 11620, 8, 0, 11624, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267552759580207_354_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267552759580207_354_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7e622000 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267552759580207_354_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,447 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (338 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5840, 1, 0, 5856, 1, 0, 6480, 1, 0, 6496, 1, 0, 8528, 1, 0, 8544, 1, 0, 9408, 4, 0, 10176, 8, 0, 12544, 8, 0, 16192, 1, 0, 21632, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267552873889829_355_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267552873889829_355_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1fa26644 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267552873889829_355_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,305 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((286 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 12032, 9, 0, 12032, 9, 0, 15680, 6, 0, 15680, 6, 0, 18304, 4, 0, 18308, 4, 0, 18320, 4, 0, 18324, 4, 0, 18336, 4, 0, 18340, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267552952304559_356_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267552952304559_356_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a953ecb8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267552952304559_356_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 1312, 1, 0, 1328, 1, 0, 3280, 1, 0, 3296, 1, 0, 3312, 1, 0, 6336, 5, 0, 6336, 5, 0, 6976, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267553061258745_357_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267553061258745_357_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5f312204 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267553061258745_357_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,408 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((320 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1984, 9, 0, 1984, 9, 0, 2000, 9, 0, 2000, 9, 0, 2944, 8, 0, 2948, 8, 0, 2952, 8, 0, 2960, 8, 0, 2964, 8, 0, 2968, 8, 0, 3392, 1, 0, 3396, 1, 0, 3400, 1, 0, 3408, 1, 0, 3412, 1, 0, 3416, 1, 0, 10432, 2, 0, 10448, 2, 0, 12944, 6, 0, 12944, 6, 0, 12960, 6, 0, 12960, 6, 0, 14976, 5, 0, 14976, 5, 0, 15888, 2, 0, 15904, 2, 0, 19152, 8, 0, 19168, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267553371928637_358_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267553371928637_358_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2d8bb4d9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267553371928637_358_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 2176, 5, 0, 2176, 5, 0, 2192, 5, 0, 2192, 5, 0, 3648, 8, 0, 3664, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267553438179890_359_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267553438179890_359_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1d160f68 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267553438179890_359_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,176 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3648, 1, 0, 4544, 4, 0, 6016, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267553610140146_361_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267553610140146_361_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..20820ab9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267553610140146_361_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 13, 0, 2048, 13, 0, 2048, 13, 0, 2688, 1, 0, 5632, 4, 0, 6976, 8, 0, 10384, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267553680350057_362_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267553680350057_362_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d2de9801 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267553680350057_362_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,193 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((105 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 12, 0, 848, 12, 0, 864, 12, 0, 864, 12, 0, 1808, 4, 0, 1812, 4, 0, 1816, 4, 0, 1824, 4, 0, 1828, 4, 0, 1832, 4, 0, 2960, 9, 0, 2960, 9, 0, 2964, 9, 0, 2964, 9, 0, 2968, 9, 0, 2968, 9, 0, 2976, 9, 0, 2976, 9, 0, 2980, 9, 0, 2980, 9, 0, 2984, 9, 0, 2984, 9, 0, 3600, 1, 0, 3604, 1, 0, 3608, 1, 0, 3616, 1, 0, 3620, 1, 0, 3624, 1, 0, 4944, 8, 0, 4948, 8, 0, 4952, 8, 0, 4960, 8, 0, 4964, 8, 0, 4968, 8, 0, 5648, 4, 0, 5652, 4, 0, 5656, 4, 0, 5664, 4, 0, 5668, 4, 0, 5672, 4, 0, 7440, 4, 0, 7444, 4, 0, 7448, 4, 0, 7456, 4, 0, 7460, 4, 0, 7464, 4, 0, 7888, 1, 0, 7892, 1, 0, 7896, 1, 0, 7904, 1, 0, 7908, 1, 0, 7912, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267553850984333_363_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267553850984333_363_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f03aace1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267553850984333_363_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4160, 1, 0, 4176, 1, 0, 4192, 1, 0, 5120, 1, 0, 5136, 1, 0, 5152, 1, 0, 8256, 2, 0, 8260, 2, 0, 8264, 2, 0, 8272, 2, 0, 8276, 2, 0, 8280, 2, 0, 11456, 2, 0, 11460, 2, 0, 11464, 2, 0, 11472, 2, 0, 11476, 2, 0, 11480, 2, 0, 12160, 4, 0, 12608, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267553968579711_364_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267553968579711_364_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267553968579711_364_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267554018151664_365_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267554018151664_365_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b832a73f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267554018151664_365_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 5760, 4, 0, 6400, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267554068613909_366_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267554068613909_366_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d1bc60a9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267554068613909_366_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 6, 0, 3200, 6, 0, 2816, 1, 0, 2432, 8, 0, 3840, 9, 0, 3840, 9, 0, 4416, 1, 0, 4736, 4, 0, 5376, 5, 0, 5376, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267554123776499_367_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267554123776499_367_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4e47bbcc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267554123776499_367_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,501 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((356 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((363 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((370 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter7 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (429 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (439 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (448 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (453 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 9, 0, 768, 9, 0, 3136, 2, 0, 3152, 2, 0, 3168, 2, 0, 7168, 2, 0, 7172, 2, 0, 7176, 2, 0, 7184, 2, 0, 7188, 2, 0, 7192, 2, 0, 7200, 2, 0, 7204, 2, 0, 7208, 2, 0, 7872, 2, 0, 7876, 2, 0, 7880, 2, 0, 7888, 2, 0, 7892, 2, 0, 7896, 2, 0, 7904, 2, 0, 7908, 2, 0, 7912, 2, 0, 8640, 2, 0, 8656, 2, 0, 8672, 2, 0, 11904, 4, 0, 21248, 2, 0, 24832, 2, 0, 27008, 4, 0, 27456, 8, 0, 28096, 9, 0, 28096, 9, 0, 28992, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267554476963890_368_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267554476963890_368_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5bcec41f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267554476963890_368_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,477 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((326 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (389 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((404 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((426 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((433 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (443 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 147 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 8, 0, 1184, 8, 0, 1200, 8, 0, 2512, 1, 0, 2528, 1, 0, 2544, 1, 0, 3456, 1, 0, 3472, 1, 0, 3488, 1, 0, 4356, 1, 0, 4360, 1, 0, 4372, 1, 0, 4376, 1, 0, 4388, 1, 0, 4392, 1, 0, 5508, 10, 0, 5508, 10, 0, 5512, 10, 0, 5512, 10, 0, 5524, 10, 0, 5524, 10, 0, 5528, 10, 0, 5528, 10, 0, 5540, 10, 0, 5540, 10, 0, 5544, 10, 0, 5544, 10, 0, 6212, 8, 0, 6216, 8, 0, 6228, 8, 0, 6232, 8, 0, 6244, 8, 0, 6248, 8, 0, 7104, 8, 0, 7120, 8, 0, 7136, 8, 0, 10880, 4, 0, 19520, 1, 0, 19536, 1, 0, 20864, 1, 0, 20868, 1, 0, 20872, 1, 0, 20880, 1, 0, 20884, 1, 0, 20888, 1, 0, 23232, 4, 0, 23680, 8, 0, 24320, 1, 0, 28352, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267555132434534_369_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267555132434534_369_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dfeb8055 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267555132434534_369_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,312 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((245 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 9, 0, 768, 9, 0, 3088, 2, 0, 3104, 2, 0, 8656, 2, 0, 8672, 2, 0, 11776, 4, 0, 12736, 3, 0, 12736, 3, 0, 12752, 3, 0, 12752, 3, 0, 13376, 1, 0, 13392, 1, 0, 14980, 1, 0, 14984, 1, 0, 14988, 1, 0, 14996, 1, 0, 15000, 1, 0, 15004, 1, 0, 16128, 1, 0, 16144, 1, 0, 16576, 1, 0, 16592, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267555286428412_370_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267555286428412_370_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..40675c78 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267555286428412_370_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,397 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((106 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 2)) { + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2448, 8, 0, 2464, 8, 0, 2480, 8, 0, 4288, 9, 0, 4288, 9, 0, 5648, 1, 0, 5664, 1, 0, 6804, 1, 0, 6808, 1, 0, 6820, 1, 0, 6824, 1, 0, 8208, 1, 0, 8224, 1, 0, 10944, 2, 0, 10948, 2, 0, 10952, 2, 0, 10960, 2, 0, 10964, 2, 0, 10968, 2, 0, 10976, 2, 0, 10980, 2, 0, 10984, 2, 0, 11840, 2, 0, 11856, 2, 0, 11872, 2, 0, 12160, 4, 0, 15680, 9, 0, 15680, 9, 0, 16512, 1, 0, 18816, 3, 0, 18816, 3, 0, 20800, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267555700069059_372_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267555700069059_372_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1b3a1c05 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267555700069059_372_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,553 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((272 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((323 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((339 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter8 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (383 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (392 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (412 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (423 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (432 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (442 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (449 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (460 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (471 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (476 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (502 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i9 = 0; (i9 < 3); i9 = (i9 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((519 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i9 == 1)) { + continue; + } + } + } else { + uint counter10 = 0; + while ((counter10 < 3)) { + counter10 = (counter10 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((536 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter10 == 2)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 105 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 1312, 1, 0, 1328, 1, 0, 2448, 1, 0, 2452, 1, 0, 2464, 1, 0, 2468, 1, 0, 2480, 1, 0, 2484, 1, 0, 4688, 1, 0, 4692, 1, 0, 4704, 1, 0, 4708, 1, 0, 4720, 1, 0, 4724, 1, 0, 5392, 1, 0, 5408, 1, 0, 5424, 1, 0, 9664, 4, 0, 9680, 4, 0, 9696, 4, 0, 14464, 4, 0, 14468, 4, 0, 14480, 4, 0, 14484, 4, 0, 14496, 4, 0, 14500, 4, 0, 18048, 1, 0, 18624, 1, 0, 20048, 4, 0, 20064, 4, 0, 23872, 12, 0, 23872, 12, 0, 24512, 1, 0, 30464, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267556227715749_373_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267556227715749_373_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..740db36c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267556227715749_373_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 3856, 1, 0, 3872, 1, 0, 3888, 1, 0, 5696, 4, 0, 6144, 2, 0, 7312, 8, 0, 7328, 8, 0, 9296, 8, 0, 9312, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267556391582952_375_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267556391582952_375_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66bb4bbc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267556391582952_375_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,85 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267556437280456_376_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267556437280456_376_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d98420c9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267556437280456_376_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,598 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((264 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((273 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((303 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + } + case 1: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((320 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((355 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((375 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 2)) { + counter9 = (counter9 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((391 << 6) | (i8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((401 << 6) | (i8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((410 << 6) | (i8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((415 << 6) | (i8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((419 << 6) | (i8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((428 << 6) | (i8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter9 == 1)) { + break; + } + } + } + } + case 3: { + for (uint i10 = 0; (i10 < 2); i10 = (i10 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((450 << 6) | (i10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((464 << 6) | (i10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((478 << 6) | (i10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((489 << 6) | (i10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i10 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 3712, 8, 0, 8832, 4, 0, 9472, 1, 0, 10368, 4, 0, 10816, 8, 0, 14080, 1, 0, 20480, 2, 0, 20496, 2, 0, 21568, 1, 0, 21584, 1, 0, 22720, 2, 0, 22736, 2, 0, 24000, 4, 0, 24016, 4, 0, 25028, 4, 0, 25044, 4, 0, 26564, 4, 0, 26580, 4, 0, 27396, 4, 0, 27412, 4, 0, 28800, 8, 0, 28816, 8, 0, 29696, 8, 0, 29712, 8, 0, 31296, 8, 0, 31312, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267556534003515_377_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267556534003515_377_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c7d84d56 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267556534003515_377_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3136, 8, 0, 6080, 9, 0, 6080, 9, 0, 6096, 9, 0, 6096, 9, 0, 6112, 9, 0, 6112, 9, 0, 7232, 9, 0, 7232, 9, 0, 7248, 9, 0, 7248, 9, 0, 7264, 9, 0, 7264, 9, 0, 9280, 4, 0, 9296, 4, 0, 9312, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267557126999293_379_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267557126999293_379_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fe16efee --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267557126999293_379_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,150 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4160, 1, 0, 5056, 4, 0, 5504, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267557184542170_380_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267557184542170_380_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bcafe552 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267557184542170_380_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,151 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3664, 8, 0, 3680, 8, 0, 3696, 8, 0, 4624, 8, 0, 4628, 8, 0, 4640, 8, 0, 4644, 8, 0, 4656, 8, 0, 4660, 8, 0, 5072, 3, 0, 5072, 3, 0, 5076, 3, 0, 5076, 3, 0, 5088, 3, 0, 5088, 3, 0, 5092, 3, 0, 5092, 3, 0, 5104, 3, 0, 5104, 3, 0, 5108, 3, 0, 5108, 3, 0, 5840, 8, 0, 5856, 8, 0, 5872, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267557363496979_381_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267557363496979_381_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8cbc9a84 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267557363496979_381_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267557414529962_382_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267557414529962_382_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..858a53ae --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267557414529962_382_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 6, 0, 1088, 6, 0, 5328, 2, 0, 5344, 2, 0, 6016, 4, 0, 9792, 4, 0, 10240, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267557521614515_384_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267557521614515_384_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29544b40 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267557521614515_384_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2192, 1, 0, 3344, 5, 0, 3344, 5, 0, 4048, 1, 0, 5584, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267557574887033_385_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267557574887033_385_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17e62a8a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267557574887033_385_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,224 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 8576, 3, 0, 8576, 3, 0, 8320, 12, 0, 8320, 12, 0, 9984, 5, 0, 9984, 5, 0, 9600, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267557732200018_387_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267557732200018_387_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fb284603 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267557732200018_387_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,404 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1728, 8, 0, 4160, 2, 0, 7552, 6, 0, 7552, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267557872190438_389_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267557872190438_389_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..24d007df --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267557872190438_389_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 9, 0, 1600, 9, 0, 2240, 8, 0, 3392, 8, 0, 4928, 9, 0, 4928, 9, 0, 6592, 4, 0, 6608, 4, 0, 6624, 4, 0, 7808, 2, 0, 7824, 2, 0, 7840, 2, 0, 8640, 1, 0, 9536, 4, 0, 11216, 8, 0, 12176, 2, 0, 13712, 10, 0, 13712, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267557959624013_390_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267557959624013_390_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..627a2114 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267557959624013_390_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,251 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((29 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 5, 0, 976, 5, 0, 992, 5, 0, 992, 5, 0, 1876, 4, 0, 1892, 4, 0, 4180, 1, 0, 4196, 1, 0, 4628, 4, 0, 4644, 4, 0, 5392, 10, 0, 5392, 10, 0, 5408, 10, 0, 5408, 10, 0, 6480, 4, 0, 6496, 4, 0, 7760, 1, 0, 7776, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267558085401934_391_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267558085401934_391_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7f4eddec --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267558085401934_391_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,556 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((405 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (412 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((436 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((461 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((468 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (475 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (486 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (514 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (529 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (540 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 9, 0, 1216, 9, 0, 3904, 9, 0, 3904, 9, 0, 12224, 4, 0, 12864, 1, 0, 27008, 8, 0, 29520, 8, 0, 29524, 8, 0, 29528, 8, 0, 29536, 8, 0, 29540, 8, 0, 29544, 8, 0, 29552, 8, 0, 29556, 8, 0, 29560, 8, 0, 30400, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267558322701767_393_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267558322701767_393_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6947e01e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267558322701767_393_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,150 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 3840, 2, 0, 3844, 2, 0, 3856, 2, 0, 3860, 2, 0, 5888, 4, 0, 6336, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267558385645295_394_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267558385645295_394_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7a2dbfaa --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267558385645295_394_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,239 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((174 << 6) | (i0 << 4)) | (counter2 << 2)) | counter3); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((185 << 6) | (i0 << 4)) | (counter2 << 2)) | counter3); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 315 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 9, 0, 1216, 9, 0, 2560, 5, 0, 2560, 5, 0, 3264, 5, 0, 3264, 5, 0, 5056, 9, 0, 5056, 9, 0, 5072, 9, 0, 5072, 9, 0, 5088, 9, 0, 5088, 9, 0, 5696, 8, 0, 5712, 8, 0, 5728, 8, 0, 6852, 8, 0, 6856, 8, 0, 6860, 8, 0, 6868, 8, 0, 6872, 8, 0, 6876, 8, 0, 6884, 8, 0, 6888, 8, 0, 6892, 8, 0, 7556, 8, 0, 7560, 8, 0, 7564, 8, 0, 7572, 8, 0, 7576, 8, 0, 7580, 8, 0, 7588, 8, 0, 7592, 8, 0, 7596, 8, 0, 8704, 1, 0, 8720, 1, 0, 8736, 1, 0, 9860, 5, 0, 9860, 5, 0, 9864, 5, 0, 9864, 5, 0, 9876, 5, 0, 9876, 5, 0, 9880, 5, 0, 9880, 5, 0, 9892, 5, 0, 9892, 5, 0, 9896, 5, 0, 9896, 5, 0, 11141, 1, 0, 11142, 1, 0, 11143, 1, 0, 11145, 1, 0, 11146, 1, 0, 11147, 1, 0, 11157, 1, 0, 11158, 1, 0, 11159, 1, 0, 11161, 1, 0, 11162, 1, 0, 11163, 1, 0, 11173, 1, 0, 11174, 1, 0, 11175, 1, 0, 11177, 1, 0, 11178, 1, 0, 11179, 1, 0, 11845, 1, 0, 11846, 1, 0, 11847, 1, 0, 11849, 1, 0, 11850, 1, 0, 11851, 1, 0, 11861, 1, 0, 11862, 1, 0, 11863, 1, 0, 11865, 1, 0, 11866, 1, 0, 11867, 1, 0, 11877, 1, 0, 11878, 1, 0, 11879, 1, 0, 11881, 1, 0, 11882, 1, 0, 11883, 1, 0, 12548, 6, 0, 12548, 6, 0, 12552, 6, 0, 12552, 6, 0, 12564, 6, 0, 12564, 6, 0, 12568, 6, 0, 12568, 6, 0, 12580, 6, 0, 12580, 6, 0, 12584, 6, 0, 12584, 6, 0, 13440, 1, 0, 13456, 1, 0, 13472, 1, 0, 14144, 9, 0, 14144, 9, 0, 14160, 9, 0, 14160, 9, 0, 14176, 9, 0, 14176, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267558523885502_395_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267558523885502_395_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..be407bb9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267558523885502_395_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2896, 8, 0, 2912, 8, 0, 3856, 8, 0, 3872, 8, 0, 7552, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267558687330401_397_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267558687330401_397_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5edf33c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267558687330401_397_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,94 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((21 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 1, 0, 1364, 1, 0, 1376, 1, 0, 1380, 1, 0, 2256, 10, 0, 2256, 10, 0, 2260, 10, 0, 2260, 10, 0, 2272, 10, 0, 2272, 10, 0, 2276, 10, 0, 2276, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267558768677661_398_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267558768677661_398_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c486ced5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267558768677661_398_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,352 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((219 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((229 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((238 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((247 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4480, 1, 0, 5440, 9, 0, 5440, 9, 0, 6480, 2, 0, 10512, 4, 0, 12928, 5, 0, 12928, 5, 0, 12944, 5, 0, 12944, 5, 0, 12960, 5, 0, 12960, 5, 0, 14016, 4, 0, 14020, 4, 0, 14024, 4, 0, 14032, 4, 0, 14036, 4, 0, 14040, 4, 0, 14048, 4, 0, 14052, 4, 0, 14056, 4, 0, 14656, 1, 0, 14660, 1, 0, 14664, 1, 0, 14672, 1, 0, 14676, 1, 0, 14680, 1, 0, 14688, 1, 0, 14692, 1, 0, 14696, 1, 0, 15232, 1, 0, 15236, 1, 0, 15240, 1, 0, 15248, 1, 0, 15252, 1, 0, 15256, 1, 0, 15264, 1, 0, 15268, 1, 0, 15272, 1, 0, 15552, 5, 0, 15552, 5, 0, 15556, 5, 0, 15556, 5, 0, 15560, 5, 0, 15560, 5, 0, 15568, 5, 0, 15568, 5, 0, 15572, 5, 0, 15572, 5, 0, 15576, 5, 0, 15576, 5, 0, 15584, 5, 0, 15584, 5, 0, 15588, 5, 0, 15588, 5, 0, 15592, 5, 0, 15592, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267559032677001_399_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267559032677001_399_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e278a9ca --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267559032677001_399_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,264 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 2)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 8, 0, 4288, 2, 0, 6996, 2, 0, 7000, 2, 0, 7004, 2, 0, 7012, 2, 0, 7016, 2, 0, 7020, 2, 0, 7028, 2, 0, 7032, 2, 0, 7036, 2, 0, 12992, 1, 0, 16128, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267559385365590_400_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267559385365590_400_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..012bc9eb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267559385365590_400_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 1728, 1, 0, 4288, 4, 0, 5888, 13, 0, 5888, 13, 0, 5888, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267559436624680_401_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267559436624680_401_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f7613735 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267559436624680_401_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,248 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((187 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4608, 8, 0, 10368, 4, 0, 10384, 4, 0, 10400, 4, 0, 11972, 4, 0, 11976, 4, 0, 11980, 4, 0, 11988, 4, 0, 11992, 4, 0, 11996, 4, 0, 12004, 4, 0, 12008, 4, 0, 12012, 4, 0, 12672, 4, 0, 12688, 4, 0, 12704, 4, 0, 13248, 10, 0, 13248, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267559515989078_402_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267559515989078_402_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6cb64dca --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267559515989078_402_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1472, 10, 0, 1472, 10, 0, 2432, 8, 0, 5376, 5, 0, 5376, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267559567052257_403_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267559567052257_403_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..21475ea5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267559567052257_403_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,218 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((128 << 6) | (counter2 << 4)) | (i3 << 2)) | counter4); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((150 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((counter2 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2048, 2, 0, 2064, 2, 0, 2080, 2, 0, 4992, 4, 0, 6672, 9, 0, 6672, 9, 0, 6676, 9, 0, 6676, 9, 0, 6688, 9, 0, 6688, 9, 0, 6692, 9, 0, 6692, 9, 0, 7312, 9, 0, 7312, 9, 0, 7316, 9, 0, 7316, 9, 0, 7328, 9, 0, 7328, 9, 0, 7332, 9, 0, 7332, 9, 0, 8209, 2, 0, 8210, 2, 0, 8213, 2, 0, 8214, 2, 0, 8225, 2, 0, 8226, 2, 0, 8229, 2, 0, 8230, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267559940407636_404_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267559940407636_404_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8067c405 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267559940407636_404_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2384, 1, 0, 2400, 1, 0, 5632, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267560066746918_406_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267560066746918_406_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..05255715 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267560066746918_406_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,129 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 8, 0, 1488, 8, 0, 1504, 8, 0, 3904, 6, 0, 3904, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267560123258217_407_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267560123258217_407_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..68e0eac3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267560123258217_407_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,296 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2704, 8, 0, 2720, 8, 0, 2736, 8, 0, 5248, 2, 0, 5824, 2, 0, 6656, 9, 0, 6656, 9, 0, 16576, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267560379035014_410_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267560379035014_410_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6a2b5598 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267560379035014_410_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,136 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 6, 0, 1408, 6, 0, 1424, 6, 0, 1424, 6, 0, 1440, 6, 0, 1440, 6, 0, 3840, 2, 0, 3856, 2, 0, 3872, 2, 0, 4928, 9, 0, 4928, 9, 0, 4944, 9, 0, 4944, 9, 0, 4960, 9, 0, 4960, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267560438416964_411_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267560438416964_411_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..188d5adf --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267560438416964_411_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((110 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((198 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((205 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((216 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((223 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 171 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 2, 0, 11536, 12, 0, 11536, 12, 0, 11552, 12, 0, 11552, 12, 0, 11568, 12, 0, 11568, 12, 0, 12688, 3, 0, 12688, 3, 0, 12692, 3, 0, 12692, 3, 0, 12704, 3, 0, 12704, 3, 0, 12708, 3, 0, 12708, 3, 0, 12720, 3, 0, 12720, 3, 0, 12724, 3, 0, 12724, 3, 0, 13136, 3, 0, 13136, 3, 0, 13140, 3, 0, 13140, 3, 0, 13152, 3, 0, 13152, 3, 0, 13156, 3, 0, 13156, 3, 0, 13168, 3, 0, 13168, 3, 0, 13172, 3, 0, 13172, 3, 0, 13840, 12, 0, 13840, 12, 0, 13844, 12, 0, 13844, 12, 0, 13856, 12, 0, 13856, 12, 0, 13860, 12, 0, 13860, 12, 0, 13872, 12, 0, 13872, 12, 0, 13876, 12, 0, 13876, 12, 0, 14288, 3, 0, 14288, 3, 0, 14292, 3, 0, 14292, 3, 0, 14304, 3, 0, 14304, 3, 0, 14308, 3, 0, 14308, 3, 0, 14320, 3, 0, 14320, 3, 0, 14324, 3, 0, 14324, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267560770902524_413_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267560770902524_413_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2f808bd6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267560770902524_413_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,393 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2176, 8, 0, 2192, 8, 0, 3076, 8, 0, 3080, 8, 0, 3092, 8, 0, 3096, 8, 0, 3520, 8, 0, 3536, 8, 0, 5952, 8, 0, 5968, 8, 0, 5984, 8, 0, 9984, 4, 0, 14208, 9, 0, 14208, 9, 0, 19088, 2, 0, 19104, 2, 0, 19120, 2, 0, 20560, 2, 0, 20576, 2, 0, 20592, 2, 0, 22272, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267560945191885_414_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267560945191885_414_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e893d494 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267560945191885_414_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 1232, 1, 0, 2432, 1, 0, 2436, 1, 0, 2448, 1, 0, 2452, 1, 0, 3264, 1, 0, 3268, 1, 0, 3280, 1, 0, 3284, 1, 0, 4352, 6, 0, 4352, 6, 0, 4800, 14, 0, 4800, 14, 0, 4800, 14, 0, 5440, 5, 0, 5440, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267561012016634_415_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267561012016634_415_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dadff40a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267561012016634_415_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5376, 1, 0, 5696, 4, 0, 6144, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267561057838447_416_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267561057838447_416_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..609746da --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267561057838447_416_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 10, 0, 832, 10, 0, 1936, 4, 0, 1952, 4, 0, 1968, 4, 0, 2896, 1, 0, 2900, 1, 0, 2904, 1, 0, 2912, 1, 0, 2916, 1, 0, 2920, 1, 0, 2928, 1, 0, 2932, 1, 0, 2936, 1, 0, 3344, 1, 0, 3360, 1, 0, 3376, 1, 0, 4352, 10, 0, 4352, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267561144660103_417_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267561144660103_417_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dfb3b3f0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267561144660103_417_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5248, 8, 0, 5264, 8, 0, 6596, 10, 0, 6596, 10, 0, 6612, 10, 0, 6612, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267561228888004_418_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267561228888004_418_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f2444ad4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267561228888004_418_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,481 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((348 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((355 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((372 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((385 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (411 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 6912, 10, 0, 6912, 10, 0, 10944, 10, 0, 10944, 10, 0, 11840, 9, 0, 11840, 9, 0, 17984, 4, 0, 19088, 4, 0, 19104, 4, 0, 21328, 4, 0, 21344, 4, 0, 22736, 1, 0, 22752, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267561300512692_419_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267561300512692_419_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267561300512692_419_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267561345028242_420_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267561345028242_420_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..38dd759c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267561345028242_420_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 2112, 9, 0, 2112, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267561393188619_421_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267561393188619_421_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..26791b26 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267561393188619_421_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,155 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2368, 5, 0, 2368, 5, 0, 2384, 5, 0, 2384, 5, 0, 2400, 5, 0, 2400, 5, 0, 4416, 5, 0, 4416, 5, 0, 4432, 5, 0, 4432, 5, 0, 4448, 5, 0, 4448, 5, 0, 5120, 1, 0, 5136, 1, 0, 5152, 1, 0, 5696, 5, 0, 5696, 5, 0, 5712, 5, 0, 5712, 5, 0, 5728, 5, 0, 5728, 5, 0, 7936, 5, 0, 7936, 5, 0, 7952, 5, 0, 7952, 5, 0, 7968, 5, 0, 7968, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267561504577225_422_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267561504577225_422_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e575f52b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267561504577225_422_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,113 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 3264, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267561618841131_424_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267561618841131_424_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5ffe324b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267561618841131_424_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 6, 0, 1808, 6, 0, 1824, 6, 0, 1824, 6, 0, 1840, 6, 0, 1840, 6, 0, 3472, 4, 0, 3488, 4, 0, 3504, 4, 0, 4432, 4, 0, 4448, 4, 0, 4464, 4, 0, 5328, 1, 0, 5344, 1, 0, 5360, 1, 0, 6356, 8, 0, 6360, 8, 0, 6364, 8, 0, 6372, 8, 0, 6376, 8, 0, 6380, 8, 0, 6388, 8, 0, 6392, 8, 0, 6396, 8, 0, 8340, 8, 0, 8344, 8, 0, 8348, 8, 0, 8356, 8, 0, 8360, 8, 0, 8364, 8, 0, 8372, 8, 0, 8376, 8, 0, 8380, 8, 0, 8784, 3, 0, 8784, 3, 0, 8800, 3, 0, 8800, 3, 0, 8816, 3, 0, 8816, 3, 0, 9408, 9, 0, 9408, 9, 0, 10304, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267562399216801_425_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267562399216801_425_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a56576cb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267562399216801_425_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 14, 0, 1856, 14, 0, 1856, 14, 0, 2496, 1, 0, 7040, 4, 0, 7056, 4, 0, 12480, 1, 0, 13376, 6, 0, 13376, 6, 0, 13824, 14, 0, 13824, 14, 0, 13824, 14, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267562477915447_426_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267562477915447_426_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a10f1fa1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267562477915447_426_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 13, 0, 1104, 13, 0, 1104, 13, 0, 1120, 13, 0, 1120, 13, 0, 1120, 13, 0, 2256, 13, 0, 2256, 13, 0, 2256, 13, 0, 2272, 13, 0, 2272, 13, 0, 2272, 13, 0, 2896, 9, 0, 2896, 9, 0, 2912, 9, 0, 2912, 9, 0, 3792, 4, 0, 3808, 4, 0, 4496, 13, 0, 4496, 13, 0, 4496, 13, 0, 4512, 13, 0, 4512, 13, 0, 4512, 13, 0, 5200, 9, 0, 5200, 9, 0, 5216, 9, 0, 5216, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267562654494023_428_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267562654494023_428_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..88160e29 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267562654494023_428_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,301 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((247 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((256 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 4, 0, 1920, 4, 0, 3200, 4, 0, 3648, 1, 0, 5760, 1, 0, 6336, 5, 0, 6336, 5, 0, 7680, 9, 0, 7680, 9, 0, 8896, 9, 0, 8896, 9, 0, 8912, 9, 0, 8912, 9, 0, 8928, 9, 0, 8928, 9, 0, 10048, 9, 0, 10048, 9, 0, 10064, 9, 0, 10064, 9, 0, 10080, 9, 0, 10080, 9, 0, 12288, 9, 0, 12288, 9, 0, 12304, 9, 0, 12304, 9, 0, 12320, 9, 0, 12320, 9, 0, 13184, 9, 0, 13184, 9, 0, 14096, 2, 0, 14112, 2, 0, 15184, 2, 0, 15188, 2, 0, 15200, 2, 0, 15204, 2, 0, 17152, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267562793938505_429_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267562793938505_429_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cb8b77e6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267562793938505_429_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,113 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2560, 9, 0, 2560, 9, 0, 3264, 9, 0, 3264, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267562854674349_430_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267562854674349_430_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..77b15870 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267562854674349_430_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,164 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 4, 0, 5312, 1, 0, 6016, 1, 0, 6592, 5, 0, 6592, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267562973633906_432_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267562973633906_432_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e11fa277 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267562973633906_432_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,192 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((168 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 10, 0, 832, 10, 0, 6528, 4, 0, 6144, 9, 0, 6144, 9, 0, 5504, 2, 0, 8256, 4, 0, 8272, 4, 0, 9796, 4, 0, 9800, 4, 0, 9804, 4, 0, 9812, 4, 0, 9816, 4, 0, 9820, 4, 0, 10756, 8, 0, 10760, 8, 0, 10764, 8, 0, 10772, 8, 0, 10776, 8, 0, 10780, 8, 0, 11200, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267563273655253_435_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267563273655253_435_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d53bffc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267563273655253_435_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,150 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2368, 8, 0, 2384, 8, 0, 6144, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267563323853714_436_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267563323853714_436_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09de544f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267563323853714_436_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 2)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 4, 0, 2320, 4, 0, 2336, 4, 0, 3332, 8, 0, 3348, 8, 0, 3364, 8, 0, 3908, 4, 0, 3924, 4, 0, 3940, 4, 0, 5056, 8, 0, 5072, 8, 0, 5088, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267563395759298_437_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267563395759298_437_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..751f1a94 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267563395759298_437_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 1232, 1, 0, 4864, 1, 0, 4880, 1, 0, 6976, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267563464936928_438_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267563464936928_438_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..93666c52 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267563464936928_438_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,162 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 9, 0, 1104, 9, 0, 1120, 9, 0, 1120, 9, 0, 1136, 9, 0, 1136, 9, 0, 1808, 13, 0, 1808, 13, 0, 1808, 13, 0, 1824, 13, 0, 1824, 13, 0, 1824, 13, 0, 1840, 13, 0, 1840, 13, 0, 1840, 13, 0, 4816, 4, 0, 4832, 4, 0, 5712, 1, 0, 5728, 1, 0, 6160, 1, 0, 6176, 1, 0, 6720, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267563540892378_439_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267563540892378_439_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5671ed3a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267563540892378_439_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267563594933186_440_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267563594933186_440_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..56ef1a7c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267563594933186_440_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,373 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((317 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((326 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2704, 2, 0, 4288, 4, 0, 9216, 4, 0, 9792, 5, 0, 9792, 5, 0, 10432, 9, 0, 10432, 9, 0, 16464, 4, 0, 16480, 4, 0, 16496, 4, 0, 18128, 4, 0, 18144, 4, 0, 18160, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267563667218207_441_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267563667218207_441_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e05cdaf4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267563667218207_441_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,271 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3328, 1, 0, 11776, 4, 0, 13312, 4, 0, 14016, 4, 0, 14464, 14, 0, 14464, 14, 0, 14464, 14, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267563733338214_442_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267563733338214_442_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4066bb22 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267563733338214_442_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 13, 0, 1280, 13, 0, 1280, 13, 0, 1296, 13, 0, 1296, 13, 0, 1296, 13, 0, 3200, 1, 0, 3204, 1, 0, 3216, 1, 0, 3220, 1, 0, 4160, 9, 0, 4160, 9, 0, 4164, 9, 0, 4164, 9, 0, 4176, 9, 0, 4176, 9, 0, 4180, 9, 0, 4180, 9, 0, 4992, 9, 0, 4992, 9, 0, 5888, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267563822111652_443_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267563822111652_443_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eccef151 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267563822111652_443_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2304, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267564082976409_445_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267564082976409_445_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..52e490ad --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267564082976409_445_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1, 0, 1360, 1, 0, 1376, 1, 0, 2244, 1, 0, 2260, 1, 0, 2276, 1, 0, 4480, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267564150529947_446_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267564150529947_446_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2eca306b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267564150529947_446_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,336 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((i1 == 2)) { + break; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 9, 0, 1296, 9, 0, 2384, 1, 0, 2388, 1, 0, 2392, 1, 0, 3024, 1, 0, 3028, 1, 0, 3032, 1, 0, 8576, 1, 0, 8580, 1, 0, 8592, 1, 0, 8596, 1, 0, 8608, 1, 0, 8612, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267564508990710_447_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267564508990710_447_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5d7cf582 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267564508990710_447_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,169 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2768, 1, 0, 2784, 1, 0, 2800, 1, 0, 4816, 4, 0, 4832, 4, 0, 4848, 4, 0, 5520, 5, 0, 5520, 5, 0, 5536, 5, 0, 5536, 5, 0, 5552, 5, 0, 5552, 5, 0, 6464, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267564578300692_448_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267564578300692_448_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b06cc300 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267564578300692_448_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3780, 2, 0, 3796, 2, 0, 4484, 2, 0, 4500, 2, 0, 7360, 2, 0, 7376, 2, 0, 8448, 1, 0, 9408, 4, 0, 9424, 4, 0, 9440, 4, 0, 10048, 5, 0, 10048, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267564664457498_449_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267564664457498_449_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..68f1c4d3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267564664457498_449_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,324 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((188 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((289 << 6) | (counter3 << 4)) | (i4 << 2)) | counter5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((298 << 6) | (counter3 << 4)) | (i4 << 2)) | counter5); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((305 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 1, 0, 2432, 1, 0, 8320, 1, 0, 13952, 6, 0, 13952, 6, 0, 14400, 8, 0, 15824, 9, 0, 15824, 9, 0, 15840, 9, 0, 15840, 9, 0, 17104, 13, 0, 17104, 13, 0, 17104, 13, 0, 17120, 13, 0, 17120, 13, 0, 17120, 13, 0, 18513, 12, 0, 18513, 12, 0, 18514, 12, 0, 18514, 12, 0, 18515, 12, 0, 18515, 12, 0, 18517, 12, 0, 18517, 12, 0, 18518, 12, 0, 18518, 12, 0, 18519, 12, 0, 18519, 12, 0, 18529, 12, 0, 18529, 12, 0, 18530, 12, 0, 18530, 12, 0, 18531, 12, 0, 18531, 12, 0, 18533, 12, 0, 18533, 12, 0, 18534, 12, 0, 18534, 12, 0, 18535, 12, 0, 18535, 12, 0, 19089, 1, 0, 19090, 1, 0, 19091, 1, 0, 19093, 1, 0, 19094, 1, 0, 19095, 1, 0, 19105, 1, 0, 19106, 1, 0, 19107, 1, 0, 19109, 1, 0, 19110, 1, 0, 19111, 1, 0, 19536, 1, 0, 19540, 1, 0, 19552, 1, 0, 19556, 1, 0, 20240, 13, 0, 20240, 13, 0, 20240, 13, 0, 20256, 13, 0, 20256, 13, 0, 20256, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267565131592125_451_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267565131592125_451_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c011e53c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267565131592125_451_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 4, 0, 1536, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267565184636788_452_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267565184636788_452_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..885f868e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267565184636788_452_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,330 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((291 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 4, 0, 2768, 4, 0, 4928, 9, 0, 4928, 9, 0, 6528, 1, 0, 6544, 1, 0, 8192, 2, 0, 8208, 2, 0, 10112, 11, 0, 10112, 11, 0, 10112, 11, 0, 10128, 11, 0, 10128, 11, 0, 10128, 11, 0, 11200, 9, 0, 11200, 9, 0, 12800, 1, 0, 12816, 1, 0, 12832, 1, 0, 13696, 8, 0, 13712, 8, 0, 13728, 8, 0, 17536, 4, 0, 17552, 4, 0, 20224, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267565327429079_453_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267565327429079_453_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..41b4c7cb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267565327429079_453_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1152, 1, 0, 4688, 4, 0, 4704, 4, 0, 5632, 8, 0, 5648, 8, 0, 7296, 12, 0, 7296, 12, 0, 7312, 12, 0, 7312, 12, 0, 8256, 4, 0, 8272, 4, 0, 9280, 8, 0, 9296, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267565553709290_455_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267565553709290_455_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..36e80cc5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267565553709290_455_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 13, 0, 1088, 13, 0, 1088, 13, 0, 2304, 13, 0, 2304, 13, 0, 2304, 13, 0, 2320, 13, 0, 2320, 13, 0, 2320, 13, 0, 3968, 9, 0, 3968, 9, 0, 3984, 9, 0, 3984, 9, 0, 4864, 9, 0, 4864, 9, 0, 6656, 9, 0, 6656, 9, 0, 6400, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267565620289181_456_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267565620289181_456_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44b69e32 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267565620289181_456_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,103 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 2, 0, 1552, 2, 0, 1568, 2, 0, 2880, 2, 0, 2896, 2, 0, 2912, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267565668913498_457_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267565668913498_457_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e492b95e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267565668913498_457_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,398 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((239 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((250 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((290 << 6) | (i1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((301 << 6) | (i1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((355 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((369 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((378 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (390 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (404 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 6080, 1, 0, 10816, 2, 0, 12800, 4, 0, 15296, 8, 0, 15300, 8, 0, 15304, 8, 0, 15312, 8, 0, 15316, 8, 0, 15320, 8, 0, 15328, 8, 0, 15332, 8, 0, 15336, 8, 0, 16000, 8, 0, 16004, 8, 0, 16008, 8, 0, 16016, 8, 0, 16020, 8, 0, 16024, 8, 0, 16032, 8, 0, 16036, 8, 0, 16040, 8, 0, 22720, 1, 0, 22736, 1, 0, 25280, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267565848612641_458_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267565848612641_458_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8f7e9351 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267565848612641_458_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,275 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 1, 0, 1616, 1, 0, 1632, 1, 0, 2752, 1, 0, 2768, 1, 0, 2784, 1, 0, 5248, 1, 0, 5264, 1, 0, 5280, 1, 0, 7040, 2, 0, 7056, 2, 0, 7072, 2, 0, 9536, 4, 0, 10176, 8, 0, 14400, 9, 0, 14400, 9, 0, 14416, 9, 0, 14416, 9, 0, 14432, 9, 0, 14432, 9, 0, 15104, 9, 0, 15104, 9, 0, 15120, 9, 0, 15120, 9, 0, 15136, 9, 0, 15136, 9, 0, 16000, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267565965256392_459_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267565965256392_459_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ba2aaf39 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267565965256392_459_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 1920, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566018669761_460_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566018669761_460_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..27a3d4c6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566018669761_460_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,129 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 2000, 8, 0, 2016, 8, 0, 2032, 8, 0, 2640, 1, 0, 2656, 1, 0, 2672, 1, 0, 3088, 1, 0, 3104, 1, 0, 3120, 1, 0, 3536, 8, 0, 3552, 8, 0, 3568, 8, 0, 4560, 8, 0, 4576, 8, 0, 4592, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566081611920_461_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566081611920_461_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b54bf683 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566081611920_461_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,208 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2304, 2, 0, 3392, 2, 0, 10192, 2, 0, 10208, 2, 0, 10896, 2, 0, 10912, 2, 0, 11712, 1, 0, 12416, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566150557325_462_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566150557325_462_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a51cda2b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566150557325_462_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 2, 0, 912, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566209247337_463_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566209247337_463_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..01f782fb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566209247337_463_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,296 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 12, 0, 1984, 12, 0, 1600, 3, 0, 1600, 3, 0, 3588, 1, 0, 3592, 1, 0, 3604, 1, 0, 3608, 1, 0, 3620, 1, 0, 3624, 1, 0, 4484, 4, 0, 4488, 4, 0, 4500, 4, 0, 4504, 4, 0, 4516, 4, 0, 4520, 4, 0, 6464, 1, 0, 12224, 2, 0, 12544, 4, 0, 12992, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566422690924_464_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566422690924_464_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0069d966 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566422690924_464_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,129 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 10, 0, 1152, 10, 0, 1168, 10, 0, 1168, 10, 0, 1184, 10, 0, 1184, 10, 0, 2112, 2, 0, 2116, 2, 0, 2128, 2, 0, 2132, 2, 0, 2144, 2, 0, 2148, 2, 0, 2752, 1, 0, 2756, 1, 0, 2768, 1, 0, 2772, 1, 0, 2784, 1, 0, 2788, 1, 0, 3648, 4, 0, 3652, 4, 0, 3664, 4, 0, 3668, 4, 0, 3680, 4, 0, 3684, 4, 0, 4096, 8, 0, 4100, 8, 0, 4112, 8, 0, 4116, 8, 0, 4128, 8, 0, 4132, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566592065935_465_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566592065935_465_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..67ff515b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566592065935_465_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,215 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((29 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 1, 0, 1860, 1, 0, 1872, 1, 0, 1876, 1, 0, 5456, 2, 0, 5472, 2, 0, 6208, 4, 0, 7360, 9, 0, 7360, 9, 0, 10432, 8, 0, 10448, 8, 0, 10464, 8, 0, 11712, 5, 0, 11712, 5, 0, 11456, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566674196474_466_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566674196474_466_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53da6cbb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566674196474_466_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566733536341_467_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566733536341_467_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c1253735 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566733536341_467_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 4672, 4, 0, 5248, 4, 0, 5952, 4, 0, 6400, 8, 0, 7040, 1, 0, 7616, 1, 0, 11216, 4, 0, 13776, 8, 0, 13792, 8, 0, 13808, 8, 0, 14224, 8, 0, 14240, 8, 0, 14256, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566816271055_468_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566816271055_468_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7ed16ef2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566816271055_468_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((149 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 12, 0, 2112, 12, 0, 2128, 12, 0, 2128, 12, 0, 2144, 12, 0, 2144, 12, 0, 11152, 4, 0, 11168, 4, 0, 11984, 4, 0, 12000, 4, 0, 16384, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267566934630941_469_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267566934630941_469_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..af581dcb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267566934630941_469_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1664, 2, 0, 1680, 2, 0, 1696, 2, 0, 3584, 2, 0, 3600, 2, 0, 3616, 2, 0, 4480, 2, 0, 6464, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567000652313_470_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567000652313_470_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5f9ec5d9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567000652313_470_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,551 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((329 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((348 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((398 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((424 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((434 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((443 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((447 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((464 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((473 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((487 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i5 == 1)) { + continue; + } + if ((i5 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (500 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 12608, 2, 0, 12928, 4, 0, 14400, 8, 0, 14416, 8, 0, 18176, 8, 0, 19136, 8, 0, 19152, 8, 0, 22912, 8, 0, 22928, 8, 0, 27776, 1, 0, 27792, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567086938423_471_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567086938423_471_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09737a53 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567086938423_471_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,223 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 4864, 1, 0, 8064, 8, 0, 9104, 8, 0, 9120, 8, 0, 9136, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567145941870_472_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567145941870_472_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5d968724 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567145941870_472_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,136 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 99 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 5, 0, 1600, 5, 0, 3220, 13, 0, 3220, 13, 0, 3220, 13, 0, 3224, 13, 0, 3224, 13, 0, 3224, 13, 0, 3236, 13, 0, 3236, 13, 0, 3236, 13, 0, 3240, 13, 0, 3240, 13, 0, 3240, 13, 0, 4820, 4, 0, 4824, 4, 0, 4836, 4, 0, 4840, 4, 0, 5524, 13, 0, 5524, 13, 0, 5524, 13, 0, 5528, 13, 0, 5528, 13, 0, 5528, 13, 0, 5540, 13, 0, 5540, 13, 0, 5540, 13, 0, 5544, 13, 0, 5544, 13, 0, 5544, 13, 0, 6672, 4, 0, 6688, 4, 0, 7616, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567250490431_473_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567250490431_473_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9045f6dc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567250490431_473_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,202 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2320, 4, 0, 2336, 4, 0, 2352, 4, 0, 4480, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567305059842_474_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567305059842_474_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c54364d9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567305059842_474_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 6, 0, 1600, 6, 0, 2768, 4, 0, 2784, 4, 0, 5072, 4, 0, 5088, 4, 0, 6016, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567359364172_475_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567359364172_475_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf2d62da --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567359364172_475_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 3, 0, 896, 3, 0, 912, 3, 0, 912, 3, 0, 2880, 2, 0, 2896, 2, 0, 4736, 1, 0, 8640, 1, 0, 8960, 4, 0, 9408, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567532842667_477_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567532842667_477_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..86a7d074 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567532842667_477_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,184 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 4160, 10, 0, 4160, 10, 0, 3648, 1, 0, 3264, 4, 0, 7616, 1, 0, 7632, 1, 0, 7648, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567592668473_478_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567592668473_478_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..630540a6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567592668473_478_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 10, 0, 832, 10, 0, 3712, 8, 0, 3728, 8, 0, 8320, 9, 0, 8320, 9, 0, 10256, 9, 0, 10256, 9, 0, 10260, 9, 0, 10260, 9, 0, 10264, 9, 0, 10264, 9, 0, 10272, 9, 0, 10272, 9, 0, 10276, 9, 0, 10276, 9, 0, 10280, 9, 0, 10280, 9, 0, 11216, 9, 0, 11216, 9, 0, 11220, 9, 0, 11220, 9, 0, 11224, 9, 0, 11224, 9, 0, 11232, 9, 0, 11232, 9, 0, 11236, 9, 0, 11236, 9, 0, 11240, 9, 0, 11240, 9, 0, 14336, 13, 0, 14336, 13, 0, 14336, 13, 0, 13312, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567749218653_479_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567749218653_479_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..781fe7dd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567749218653_479_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267567795828560_480_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267567795828560_480_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf993ee1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267567795828560_480_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,528 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((23 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((191 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((215 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((222 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((235 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 2)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((301 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((308 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((368 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (416 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 105 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1492, 1, 0, 1496, 1, 0, 1508, 1, 0, 1512, 1, 0, 2580, 1, 0, 2584, 1, 0, 2596, 1, 0, 2600, 1, 0, 3152, 8, 0, 3168, 8, 0, 4800, 2, 0, 4816, 2, 0, 10368, 2, 0, 10384, 2, 0, 12240, 4, 0, 12244, 4, 0, 12248, 4, 0, 12256, 4, 0, 12260, 4, 0, 12264, 4, 0, 12272, 4, 0, 12276, 4, 0, 12280, 4, 0, 13776, 4, 0, 13780, 4, 0, 13784, 4, 0, 13792, 4, 0, 13796, 4, 0, 13800, 4, 0, 13808, 4, 0, 13812, 4, 0, 13816, 4, 0, 16128, 1, 0, 25600, 4, 0, 26624, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267568198823996_481_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267568198823996_481_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c5c225a1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267568198823996_481_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,285 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 3856, 4, 0, 3872, 4, 0, 3888, 4, 0, 6096, 9, 0, 6096, 9, 0, 6112, 9, 0, 6112, 9, 0, 6128, 9, 0, 6128, 9, 0, 6736, 1, 0, 6752, 1, 0, 6768, 1, 0, 8080, 8, 0, 8096, 8, 0, 8112, 8, 0, 8784, 8, 0, 8800, 8, 0, 8816, 8, 0, 10192, 6, 0, 10192, 6, 0, 10208, 6, 0, 10208, 6, 0, 10224, 6, 0, 10224, 6, 0, 13712, 9, 0, 13712, 9, 0, 13728, 9, 0, 13728, 9, 0, 13744, 9, 0, 13744, 9, 0, 14672, 9, 0, 14672, 9, 0, 14688, 9, 0, 14688, 9, 0, 14704, 9, 0, 14704, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267568403713260_482_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267568403713260_482_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bcc775c1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267568403713260_482_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,128 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 4, 0, 1616, 4, 0, 3456, 1, 0, 3472, 1, 0, 4672, 9, 0, 4672, 9, 0, 4676, 9, 0, 4676, 9, 0, 4680, 9, 0, 4680, 9, 0, 4688, 9, 0, 4688, 9, 0, 4692, 9, 0, 4692, 9, 0, 4696, 9, 0, 4696, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267568490988106_483_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267568490988106_483_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4679476e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267568490988106_483_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 10, 0, 832, 10, 0, 2900, 8, 0, 2904, 8, 0, 2916, 8, 0, 2920, 8, 0, 3988, 8, 0, 3992, 8, 0, 4004, 8, 0, 4008, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267568562699718_484_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267568562699718_484_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3283cc9a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267568562699718_484_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1216, 1, 0, 2756, 2, 0, 2760, 2, 0, 2764, 2, 0, 2772, 2, 0, 2776, 2, 0, 2780, 2, 0, 3920, 2, 0, 3936, 2, 0, 6480, 2, 0, 6496, 2, 0, 6912, 3, 0, 6912, 3, 0, 7360, 1, 0, 9536, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267568655497265_485_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267568655497265_485_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1e1ce9b6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267568655497265_485_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,360 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((275 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((282 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((289 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 1, 0, 5248, 8, 0, 9664, 2, 0, 13184, 4, 0, 15376, 13, 0, 15376, 13, 0, 15376, 13, 0, 15392, 13, 0, 15392, 13, 0, 15392, 13, 0, 15408, 13, 0, 15408, 13, 0, 15408, 13, 0, 19856, 9, 0, 19856, 9, 0, 19872, 9, 0, 19872, 9, 0, 19888, 9, 0, 19888, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267568815599783_486_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267568815599783_486_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..da5565f5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267568815599783_486_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,350 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 1, 0, 7888, 1, 0, 7904, 1, 0, 8464, 1, 0, 8480, 1, 0, 17408, 4, 0, 18576, 4, 0, 18592, 4, 0, 18608, 4, 0, 19456, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267568902173801_487_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267568902173801_487_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d7bcbd93 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267568902173801_487_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 6976, 4, 0, 6992, 4, 0, 7680, 4, 0, 8960, 4, 0, 8576, 10, 0, 8576, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267568974307329_488_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267568974307329_488_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..13206e41 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267568974307329_488_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 8, 0, 2192, 9, 0, 2192, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267569023655977_489_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267569023655977_489_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..60a8bf83 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267569023655977_489_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1664, 4, 0, 3328, 9, 0, 3328, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267569074767665_490_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267569074767665_490_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0fa145f9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267569074767665_490_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,457 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((25 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((168 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((233 << 6) | (i3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((367 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((377 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((384 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((399 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (435 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (446 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (457 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (468 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (478 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (485 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 1, 0, 1604, 1, 0, 1616, 1, 0, 1620, 1, 0, 3216, 8, 0, 3232, 8, 0, 3248, 8, 0, 5584, 8, 0, 5600, 8, 0, 5616, 8, 0, 6288, 8, 0, 6304, 8, 0, 6320, 8, 0, 7616, 9, 0, 7616, 9, 0, 7632, 9, 0, 7632, 9, 0, 8768, 13, 0, 8768, 13, 0, 8768, 13, 0, 8784, 13, 0, 8784, 13, 0, 8784, 13, 0, 12032, 13, 0, 12032, 13, 0, 12032, 13, 0, 12048, 13, 0, 12048, 13, 0, 12048, 13, 0, 13696, 2, 0, 13712, 2, 0, 23504, 6, 0, 23504, 6, 0, 23520, 6, 0, 23520, 6, 0, 25552, 6, 0, 25552, 6, 0, 25568, 6, 0, 25568, 6, 0, 26688, 8, 0, 27840, 8, 0, 28544, 8, 0, 29248, 8, 0, 29952, 2, 0, 30592, 4, 0, 31040, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267569319241527_491_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267569319241527_491_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e28df722 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267569319241527_491_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,345 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((229 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((255 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((304 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((315 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 1, 0, 3408, 5, 0, 3408, 5, 0, 3424, 5, 0, 3424, 5, 0, 3440, 5, 0, 3440, 5, 0, 4096, 4, 0, 5376, 8, 0, 7232, 2, 0, 9664, 2, 0, 11072, 13, 0, 11072, 13, 0, 11072, 13, 0, 12544, 5, 0, 12544, 5, 0, 12560, 5, 0, 12560, 5, 0, 14656, 12, 0, 14656, 12, 0, 14660, 12, 0, 14660, 12, 0, 14664, 12, 0, 14664, 12, 0, 14672, 12, 0, 14672, 12, 0, 14676, 12, 0, 14676, 12, 0, 14680, 12, 0, 14680, 12, 0, 15616, 12, 0, 15616, 12, 0, 15620, 12, 0, 15620, 12, 0, 15624, 12, 0, 15624, 12, 0, 15632, 12, 0, 15632, 12, 0, 15636, 12, 0, 15636, 12, 0, 15640, 12, 0, 15640, 12, 0, 16320, 13, 0, 16320, 13, 0, 16320, 13, 0, 16324, 13, 0, 16324, 13, 0, 16324, 13, 0, 16328, 13, 0, 16328, 13, 0, 16328, 13, 0, 16336, 13, 0, 16336, 13, 0, 16336, 13, 0, 16340, 13, 0, 16340, 13, 0, 16340, 13, 0, 16344, 13, 0, 16344, 13, 0, 16344, 13, 0, 17216, 13, 0, 17216, 13, 0, 17216, 13, 0, 17856, 1, 0, 19476, 1, 0, 19480, 1, 0, 19492, 1, 0, 19496, 1, 0, 20180, 1, 0, 20184, 1, 0, 20196, 1, 0, 20200, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267569618702979_492_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267569618702979_492_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6bf2ac91 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267569618702979_492_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 1, 0, 1056, 1, 0, 2384, 5, 0, 2384, 5, 0, 2388, 5, 0, 2388, 5, 0, 2400, 5, 0, 2400, 5, 0, 2404, 5, 0, 2404, 5, 0, 3088, 5, 0, 3088, 5, 0, 3092, 5, 0, 3092, 5, 0, 3104, 5, 0, 3104, 5, 0, 3108, 5, 0, 3108, 5, 0, 3856, 4, 0, 3872, 4, 0, 5056, 8, 0, 5072, 8, 0, 5088, 8, 0, 6400, 8, 0, 6416, 8, 0, 6432, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267569833996435_494_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267569833996435_494_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..afddb1b6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267569833996435_494_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,315 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((218 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 147 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 3456, 4, 0, 5824, 9, 0, 5824, 9, 0, 7440, 9, 0, 7440, 9, 0, 7456, 9, 0, 7456, 9, 0, 7472, 9, 0, 7472, 9, 0, 8912, 1, 0, 8916, 1, 0, 8920, 1, 0, 8928, 1, 0, 8932, 1, 0, 8936, 1, 0, 8944, 1, 0, 8948, 1, 0, 8952, 1, 0, 9872, 9, 0, 9872, 9, 0, 9876, 9, 0, 9876, 9, 0, 9880, 9, 0, 9880, 9, 0, 9888, 9, 0, 9888, 9, 0, 9892, 9, 0, 9892, 9, 0, 9896, 9, 0, 9896, 9, 0, 9904, 9, 0, 9904, 9, 0, 9908, 9, 0, 9908, 9, 0, 9912, 9, 0, 9912, 9, 0, 10768, 9, 0, 10768, 9, 0, 10784, 9, 0, 10784, 9, 0, 10800, 9, 0, 10800, 9, 0, 14928, 9, 0, 14928, 9, 0, 14944, 9, 0, 14944, 9, 0, 14960, 9, 0, 14960, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267569979683899_495_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267569979683899_495_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..71d8d4d4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267569979683899_495_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,270 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 1, 0, 7104, 2, 0, 7120, 2, 0, 8384, 2, 0, 8400, 2, 0, 8896, 4, 0, 9344, 8, 0, 9984, 9, 0, 9984, 9, 0, 10880, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570051119108_496_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570051119108_496_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87146592 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570051119108_496_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,87 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2068, 1, 0, 2072, 1, 0, 2076, 1, 0, 2084, 1, 0, 2088, 1, 0, 2092, 1, 0, 2100, 1, 0, 2104, 1, 0, 2108, 1, 0, 2768, 1, 0, 2784, 1, 0, 2800, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570119248100_497_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570119248100_497_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b887f40b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570119248100_497_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 4928, 6, 0, 4928, 6, 0, 5376, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570167684525_498_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570167684525_498_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29bd7edf --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570167684525_498_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,257 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 6224, 4, 0, 6240, 4, 0, 6256, 4, 0, 6800, 4, 0, 6816, 4, 0, 6832, 4, 0, 8128, 8, 0, 9040, 8, 0, 9056, 8, 0, 9072, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570226995356_499_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570226995356_499_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f73ecb35 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570226995356_499_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 9, 0, 1408, 9, 0, 2112, 9, 0, 2112, 9, 0, 8640, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570282957378_500_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570282957378_500_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ce8fbec2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570282957378_500_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,279 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1744, 8, 0, 1760, 8, 0, 2832, 10, 0, 2832, 10, 0, 2836, 10, 0, 2836, 10, 0, 2840, 10, 0, 2840, 10, 0, 2848, 10, 0, 2848, 10, 0, 2852, 10, 0, 2852, 10, 0, 2856, 10, 0, 2856, 10, 0, 3408, 10, 0, 3408, 10, 0, 3412, 10, 0, 3412, 10, 0, 3416, 10, 0, 3416, 10, 0, 3424, 10, 0, 3424, 10, 0, 3428, 10, 0, 3428, 10, 0, 3432, 10, 0, 3432, 10, 0, 4496, 8, 0, 4512, 8, 0, 5376, 1, 0, 6656, 2, 0, 9664, 4, 0, 13072, 8, 0, 13088, 8, 0, 13776, 8, 0, 13792, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570437439333_501_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570437439333_501_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..237e7320 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570437439333_501_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 6, 0, 1472, 6, 0, 1920, 14, 0, 1920, 14, 0, 1920, 14, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570490710379_502_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570490710379_502_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6af6a533 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570490710379_502_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,181 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 5056, 4, 0, 6208, 8, 0, 7296, 8, 0, 7312, 8, 0, 7328, 8, 0, 8000, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570544252533_503_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570544252533_503_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0eb9fb08 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570544252533_503_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,265 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2240, 10, 0, 2240, 10, 0, 9984, 8, 0, 9728, 5, 0, 9728, 5, 0, 9088, 2, 0, 11648, 8, 0, 11664, 8, 0, 11680, 8, 0, 12736, 1, 0, 12740, 1, 0, 12752, 1, 0, 12756, 1, 0, 12768, 1, 0, 12772, 1, 0, 13312, 8, 0, 13316, 8, 0, 13328, 8, 0, 13332, 8, 0, 13344, 8, 0, 13348, 8, 0, 15568, 2, 0, 15584, 2, 0, 15600, 2, 0, 15872, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570683928972_504_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570683928972_504_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7e66bb18 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570683928972_504_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,391 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((348 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 9920, 4, 0, 10368, 8, 0, 12224, 1, 0, 15104, 4, 0, 15552, 8, 0, 16192, 9, 0, 16192, 9, 0, 19520, 4, 0, 19536, 4, 0, 21184, 4, 0, 21200, 4, 0, 22272, 4, 0, 22288, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570764956115_505_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570764956115_505_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eca521d6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570764956115_505_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,328 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2240, 2, 0, 7824, 4, 0, 7828, 4, 0, 7840, 4, 0, 7844, 4, 0, 7856, 4, 0, 7860, 4, 0, 8784, 4, 0, 8788, 4, 0, 8800, 4, 0, 8804, 4, 0, 8816, 4, 0, 8820, 4, 0, 9936, 4, 0, 9940, 4, 0, 9952, 4, 0, 9956, 4, 0, 9968, 4, 0, 9972, 4, 0, 11328, 8, 0, 11344, 8, 0, 12740, 8, 0, 12744, 8, 0, 12756, 8, 0, 12760, 8, 0, 13700, 8, 0, 13704, 8, 0, 13716, 8, 0, 13720, 8, 0, 14400, 8, 0, 14416, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267570892651629_506_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267570892651629_506_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..869edd24 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267570892651629_506_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,262 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 13440, 4, 0, 13888, 12, 0, 13888, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267571067219991_508_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267571067219991_508_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b00c2535 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267571067219991_508_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,200 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1152, 1, 0, 1472, 15, 0, 1472, 15, 0, 1472, 15, 0, 1472, 15, 0, 2368, 1, 0, 6592, 4, 0, 7040, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267571116462554_509_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267571116462554_509_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4cd46d81 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267571116462554_509_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,283 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((i1 == 1)) { + continue; + } + } + if ((i0 == 1)) { + continue; + } + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + case 3: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 141 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2304, 1, 0, 2308, 1, 0, 2320, 1, 0, 2324, 1, 0, 2944, 1, 0, 2948, 1, 0, 2960, 1, 0, 2964, 1, 0, 5200, 2, 0, 6484, 6, 0, 6484, 6, 0, 6488, 6, 0, 6488, 6, 0, 6492, 6, 0, 6492, 6, 0, 8512, 10, 0, 8512, 10, 0, 8528, 10, 0, 8528, 10, 0, 8544, 10, 0, 8544, 10, 0, 10560, 8, 0, 10564, 8, 0, 10568, 8, 0, 10576, 8, 0, 10580, 8, 0, 10584, 8, 0, 10592, 8, 0, 10596, 8, 0, 10600, 8, 0, 11392, 1, 0, 11408, 1, 0, 11424, 1, 0, 12288, 4, 0, 12304, 4, 0, 12320, 4, 0, 13376, 5, 0, 13376, 5, 0, 13392, 5, 0, 13392, 5, 0, 13408, 5, 0, 13408, 5, 0, 15168, 10, 0, 15168, 10, 0, 14912, 5, 0, 14912, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267571570164424_510_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267571570164424_510_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..64e79f2a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267571570164424_510_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,134 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 9, 0, 1104, 9, 0, 2512, 1, 0, 2516, 1, 0, 3536, 10, 0, 3536, 10, 0, 4752, 6, 0, 4752, 6, 0, 4756, 6, 0, 4756, 6, 0, 4760, 6, 0, 4760, 6, 0, 5456, 10, 0, 5456, 10, 0, 5460, 10, 0, 5460, 10, 0, 5464, 10, 0, 5464, 10, 0, 6160, 9, 0, 6160, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267571661943136_511_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267571661943136_511_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1e4c1841 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267571661943136_511_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,324 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((counter4 == 2)) { + break; + } + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 3)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((278 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((285 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 153 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 9, 0, 1856, 9, 0, 1872, 9, 0, 1872, 9, 0, 1888, 9, 0, 1888, 9, 0, 3072, 9, 0, 3072, 9, 0, 3076, 9, 0, 3076, 9, 0, 3080, 9, 0, 3080, 9, 0, 3088, 9, 0, 3088, 9, 0, 3092, 9, 0, 3092, 9, 0, 3096, 9, 0, 3096, 9, 0, 3104, 9, 0, 3104, 9, 0, 3108, 9, 0, 3108, 9, 0, 3112, 9, 0, 3112, 9, 0, 4864, 9, 0, 4864, 9, 0, 6608, 1, 0, 6624, 1, 0, 7056, 8, 0, 7072, 8, 0, 11136, 9, 0, 11136, 9, 0, 13328, 2, 0, 13344, 2, 0, 13776, 2, 0, 13792, 2, 0, 16848, 4, 0, 16864, 4, 0, 16880, 4, 0, 18256, 4, 0, 18260, 4, 0, 18264, 4, 0, 18272, 4, 0, 18276, 4, 0, 18280, 4, 0, 18288, 4, 0, 18292, 4, 0, 18296, 4, 0, 18960, 4, 0, 18976, 4, 0, 18992, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267572019968135_513_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267572019968135_513_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..784471ed --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267572019968135_513_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,438 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((309 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((319 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((328 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((333 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((340 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((355 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((370 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 1, 0, 2112, 1, 0, 4608, 4, 0, 7552, 1, 0, 7568, 1, 0, 7584, 1, 0, 19780, 8, 0, 19784, 8, 0, 19796, 8, 0, 19800, 8, 0, 21764, 8, 0, 21768, 8, 0, 21780, 8, 0, 21784, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267572131756275_514_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267572131756275_514_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..df113c29 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267572131756275_514_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 7936, 8, 0, 9360, 8, 0, 9376, 8, 0, 10320, 8, 0, 10336, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267572537846183_517_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267572537846183_517_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3e14c4b9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267572537846183_517_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() == 2)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 123 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 2, 0, 1728, 1, 0, 1472, 12, 0, 1472, 12, 0, 3972, 4, 0, 3976, 4, 0, 3988, 4, 0, 3992, 4, 0, 4004, 4, 0, 4008, 4, 0, 4608, 3, 0, 4608, 3, 0, 5776, 9, 0, 5776, 9, 0, 5792, 9, 0, 5792, 9, 0, 6932, 9, 0, 6932, 9, 0, 6936, 9, 0, 6936, 9, 0, 6948, 9, 0, 6948, 9, 0, 6952, 9, 0, 6952, 9, 0, 7636, 9, 0, 7636, 9, 0, 7640, 9, 0, 7640, 9, 0, 7652, 9, 0, 7652, 9, 0, 7656, 9, 0, 7656, 9, 0, 8064, 1, 0, 9216, 13, 0, 9216, 13, 0, 9216, 13, 0, 11328, 8, 0, 12032, 8, 0, 13824, 9, 0, 13824, 9, 0, 15296, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267572711123848_518_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267572711123848_518_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..79103443 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267572711123848_518_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 2640, 8, 0, 2656, 8, 0, 2672, 8, 0, 3536, 8, 0, 3552, 8, 0, 3568, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267572962552283_520_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267572962552283_520_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f429e62a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267572962552283_520_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,200 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((121 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1664, 2, 0, 1680, 2, 0, 6080, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267573039583993_521_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267573039583993_521_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..11437652 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267573039583993_521_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 5, 0, 1104, 5, 0, 1120, 5, 0, 1120, 5, 0, 1136, 5, 0, 1136, 5, 0, 2192, 5, 0, 2192, 5, 0, 2196, 5, 0, 2196, 5, 0, 2208, 5, 0, 2208, 5, 0, 2212, 5, 0, 2212, 5, 0, 2224, 5, 0, 2224, 5, 0, 2228, 5, 0, 2228, 5, 0, 2832, 5, 0, 2832, 5, 0, 2836, 5, 0, 2836, 5, 0, 2848, 5, 0, 2848, 5, 0, 2852, 5, 0, 2852, 5, 0, 2864, 5, 0, 2864, 5, 0, 2868, 5, 0, 2868, 5, 0, 3408, 5, 0, 3408, 5, 0, 3412, 5, 0, 3412, 5, 0, 3424, 5, 0, 3424, 5, 0, 3428, 5, 0, 3428, 5, 0, 3440, 5, 0, 3440, 5, 0, 3444, 5, 0, 3444, 5, 0, 4112, 5, 0, 4112, 5, 0, 4128, 5, 0, 4128, 5, 0, 4144, 5, 0, 4144, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267573821599570_525_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267573821599570_525_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..65011711 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267573821599570_525_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,458 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((338 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((349 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((396 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((405 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 3968, 1, 0, 3984, 1, 0, 6400, 1, 0, 7104, 9, 0, 7104, 9, 0, 7936, 1, 0, 15376, 4, 0, 15392, 4, 0, 16912, 4, 0, 16928, 4, 0, 18880, 8, 0, 19776, 5, 0, 19776, 5, 0, 20992, 5, 0, 20992, 5, 0, 21008, 5, 0, 21008, 5, 0, 21632, 1, 0, 21648, 1, 0, 22336, 5, 0, 22336, 5, 0, 22352, 5, 0, 22352, 5, 0, 22912, 5, 0, 22912, 5, 0, 23616, 8, 0, 24256, 8, 0, 25344, 2, 0, 25360, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267573985099965_526_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267573985099965_526_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..659f1e98 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267573985099965_526_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,220 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 8, 0, 6080, 4, 0, 6096, 4, 0, 6112, 4, 0, 6656, 4, 0, 9472, 8, 0, 9920, 12, 0, 9920, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267574183992925_528_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267574183992925_528_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..136102df --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267574183992925_528_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2752, 2, 0, 2756, 2, 0, 2760, 2, 0, 2768, 2, 0, 2772, 2, 0, 2776, 2, 0, 2784, 2, 0, 2788, 2, 0, 2792, 2, 0, 4544, 1, 0, 6032, 4, 0, 6048, 4, 0, 9216, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267574333955173_529_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267574333955173_529_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29630f8f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267574333955173_529_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 1232, 1, 0, 5584, 4, 0, 5600, 4, 0, 6288, 4, 0, 6304, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267574397943162_530_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267574397943162_530_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b4fdb1b3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267574397943162_530_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,283 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((200 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 4928, 8, 0, 4944, 8, 0, 8720, 8, 0, 8736, 8, 0, 8752, 8, 0, 9424, 8, 0, 9440, 8, 0, 9456, 8, 0, 10304, 9, 0, 10304, 9, 0, 11920, 4, 0, 11936, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267574505007619_531_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267574505007619_531_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5539505 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267574505007619_531_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,164 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3008, 8, 0, 4096, 8, 0, 4112, 8, 0, 4128, 8, 0, 4672, 8, 0, 4688, 8, 0, 4704, 8, 0, 6080, 5, 0, 6080, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267574563606311_532_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267574563606311_532_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7a41f50c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267574563606311_532_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6528, 8, 0, 6544, 8, 0, 7616, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267574615515385_533_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267574615515385_533_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0fb2b21a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267574615515385_533_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,380 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 1984, 1, 0, 17280, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267574681025436_534_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267574681025436_534_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bc6a5a89 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267574681025436_534_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,244 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((185 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 2, 0, 2768, 8, 0, 2784, 8, 0, 4560, 8, 0, 4564, 8, 0, 4568, 8, 0, 4576, 8, 0, 4580, 8, 0, 4584, 8, 0, 6864, 6, 0, 6864, 6, 0, 6880, 6, 0, 6880, 6, 0, 7888, 10, 0, 7888, 10, 0, 7904, 10, 0, 7904, 10, 0, 8464, 10, 0, 8464, 10, 0, 8480, 10, 0, 8480, 10, 0, 9296, 6, 0, 9296, 6, 0, 9312, 6, 0, 9312, 6, 0, 11856, 8, 0, 11860, 8, 0, 11864, 8, 0, 11872, 8, 0, 11876, 8, 0, 11880, 8, 0, 13456, 1, 0, 13460, 1, 0, 13464, 1, 0, 13472, 1, 0, 13476, 1, 0, 13480, 1, 0, 14608, 8, 0, 14624, 8, 0, 15488, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267575007888272_535_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267575007888272_535_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fe3d6d1a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267575007888272_535_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((191 << 6) | (counter4 << 4)) | (i5 << 2)) | counter6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 1)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 7056, 8, 0, 7072, 8, 0, 8336, 8, 0, 8352, 8, 0, 9744, 9, 0, 9744, 9, 0, 9760, 9, 0, 9760, 9, 0, 9776, 9, 0, 9776, 9, 0, 10384, 4, 0, 10400, 4, 0, 10416, 4, 0, 13712, 10, 0, 13712, 10, 0, 13728, 10, 0, 13728, 10, 0, 13744, 10, 0, 13744, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267575187579708_536_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267575187579708_536_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f0560bea --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267575187579708_536_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (counter0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 9, 0, 1104, 9, 0, 1120, 9, 0, 1120, 9, 0, 1872, 8, 0, 1888, 8, 0, 3988, 8, 0, 3992, 8, 0, 4004, 8, 0, 4008, 8, 0, 5392, 2, 0, 5408, 2, 0, 6676, 1, 0, 6692, 1, 0, 7824, 1, 0, 7840, 1, 0, 8656, 13, 0, 8656, 13, 0, 8656, 13, 0, 8672, 13, 0, 8672, 13, 0, 8672, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267575281755817_537_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267575281755817_537_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e3f621a2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267575281755817_537_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,357 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 1, 0, 5120, 8, 0, 6720, 1, 0, 7424, 1, 0, 11520, 4, 0, 15360, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267575346043525_538_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267575346043525_538_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ed88bb0b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267575346043525_538_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,498 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((355 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((382 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((408 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((418 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((425 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((442 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((465 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((480 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((491 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (500 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((517 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((527 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((546 << 6) | (i7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i8 == 1)) { + continue; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((558 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 141 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 9, 0, 2560, 9, 0, 2576, 9, 0, 2576, 9, 0, 3712, 9, 0, 3712, 9, 0, 3728, 9, 0, 3728, 9, 0, 4416, 9, 0, 4416, 9, 0, 4432, 9, 0, 4432, 9, 0, 7232, 1, 0, 7248, 1, 0, 7264, 1, 0, 7808, 1, 0, 8768, 8, 0, 9984, 8, 0, 10000, 8, 0, 10016, 8, 0, 20112, 11, 0, 20112, 11, 0, 20112, 11, 0, 20128, 11, 0, 20128, 11, 0, 20128, 11, 0, 20944, 1, 0, 20960, 1, 0, 22736, 2, 0, 22752, 2, 0, 23040, 15, 0, 23040, 15, 0, 23040, 15, 0, 23040, 15, 0, 24448, 9, 0, 24448, 9, 0, 24464, 9, 0, 24464, 9, 0, 29760, 8, 0, 29764, 8, 0, 29768, 8, 0, 29776, 8, 0, 29780, 8, 0, 29784, 8, 0, 30720, 8, 0, 30736, 8, 0, 32000, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267575446425778_539_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267575446425778_539_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..67d9698f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267575446425778_539_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2944, 11, 0, 2944, 11, 0, 2944, 11, 0, 2560, 4, 0, 3584, 9, 0, 3584, 9, 0, 6144, 4, 0, 7184, 10, 0, 7184, 10, 0, 7200, 10, 0, 7200, 10, 0, 7216, 10, 0, 7216, 10, 0, 8340, 6, 0, 8340, 6, 0, 8344, 6, 0, 8344, 6, 0, 8356, 6, 0, 8356, 6, 0, 8360, 6, 0, 8360, 6, 0, 8372, 6, 0, 8372, 6, 0, 8376, 6, 0, 8376, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267575684889029_540_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267575684889029_540_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..917372fa --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267575684889029_540_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,239 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((105 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 123 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 9, 0, 2240, 9, 0, 2244, 9, 0, 2244, 9, 0, 2248, 9, 0, 2248, 9, 0, 2256, 9, 0, 2256, 9, 0, 2260, 9, 0, 2260, 9, 0, 2264, 9, 0, 2264, 9, 0, 3136, 9, 0, 3136, 9, 0, 3152, 9, 0, 3152, 9, 0, 4032, 9, 0, 4032, 9, 0, 6724, 4, 0, 6728, 4, 0, 6740, 4, 0, 6744, 4, 0, 7428, 4, 0, 7432, 4, 0, 7444, 4, 0, 7448, 4, 0, 8128, 4, 0, 8144, 4, 0, 9984, 1, 0, 10000, 1, 0, 10016, 1, 0, 11712, 1, 0, 11728, 1, 0, 11744, 1, 0, 13908, 2, 0, 13912, 2, 0, 13924, 2, 0, 13928, 2, 0, 13940, 2, 0, 13944, 2, 0, 14848, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267575957674029_541_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267575957674029_541_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7aa472c0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267575957674029_541_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 8, 0, 1312, 8, 0, 2000, 8, 0, 2016, 8, 0, 4288, 1, 0, 5696, 1, 0, 6336, 9, 0, 6336, 9, 0, 7232, 4, 0, 9024, 9, 0, 9024, 9, 0, 8768, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267576034328140_542_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267576034328140_542_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..09652d5d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267576034328140_542_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1152, 5, 0, 1152, 5, 0, 5952, 6, 0, 5952, 6, 0, 8512, 8, 0, 8528, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267576098494437_543_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267576098494437_543_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e538f232 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267576098494437_543_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,253 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3520, 5, 0, 3520, 5, 0, 3136, 8, 0, 2368, 2, 0, 6080, 4, 0, 7120, 5, 0, 7120, 5, 0, 7136, 5, 0, 7136, 5, 0, 9024, 5, 0, 9024, 5, 0, 10176, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267576271016709_545_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267576271016709_545_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3377d286 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267576271016709_545_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,145 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 1312, 1, 0, 5200, 5, 0, 5200, 5, 0, 5216, 5, 0, 5216, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267576336248319_546_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267576336248319_546_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..953cee20 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267576336248319_546_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((23 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4992, 4, 0, 5008, 4, 0, 5024, 4, 0, 5440, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267576402342290_547_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267576402342290_547_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e668d26e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267576402342290_547_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 10, 0, 1024, 10, 0, 1040, 10, 0, 1040, 10, 0, 1056, 10, 0, 1056, 10, 0, 2752, 4, 0, 2768, 4, 0, 2784, 4, 0, 3328, 4, 0, 3344, 4, 0, 3360, 4, 0, 4416, 4, 0, 4432, 4, 0, 4448, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267576466856048_548_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267576466856048_548_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0cd3a1b5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267576466856048_548_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,160 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 9, 0, 1808, 9, 0, 3600, 8, 0, 4240, 4, 0, 5904, 9, 0, 5904, 9, 0, 8320, 13, 0, 8320, 13, 0, 8320, 13, 0, 7808, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267576530630935_549_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267576530630935_549_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a3cea698 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267576530630935_549_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,294 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if ((WaveGetLaneIndex() == 0)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((220 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((239 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + if ((i5 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 9, 0, 2112, 9, 0, 2116, 9, 0, 2116, 9, 0, 2128, 9, 0, 2128, 9, 0, 2132, 9, 0, 2132, 9, 0, 4032, 8, 0, 4048, 8, 0, 6848, 4, 0, 8128, 5, 0, 8128, 5, 0, 11584, 13, 0, 11584, 13, 0, 11584, 13, 0, 12544, 2, 0, 14084, 2, 0, 14088, 2, 0, 14092, 2, 0, 14100, 2, 0, 14104, 2, 0, 14108, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267576769770885_550_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267576769770885_550_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6485a539 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267576769770885_550_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 10, 0, 832, 10, 0, 1920, 10, 0, 1920, 10, 0, 1936, 10, 0, 1936, 10, 0, 1952, 10, 0, 1952, 10, 0, 4032, 1, 0, 7232, 4, 0, 8448, 2, 0, 8464, 2, 0, 8480, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267576971965703_552_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267576971965703_552_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..36f01e46 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267576971965703_552_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,299 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3728, 1, 0, 3732, 1, 0, 3736, 1, 0, 3744, 1, 0, 3748, 1, 0, 3752, 1, 0, 4880, 1, 0, 4884, 1, 0, 4888, 1, 0, 4896, 1, 0, 4900, 1, 0, 4904, 1, 0, 5712, 1, 0, 5716, 1, 0, 5720, 1, 0, 5728, 1, 0, 5732, 1, 0, 5736, 1, 0, 6160, 1, 0, 6164, 1, 0, 6168, 1, 0, 6176, 1, 0, 6180, 1, 0, 6184, 1, 0, 6720, 1, 0, 7808, 2, 0, 7824, 2, 0, 10240, 1, 0, 10244, 1, 0, 10256, 1, 0, 10260, 1, 0, 11392, 2, 0, 11408, 2, 0, 14464, 4, 0, 15936, 5, 0, 15936, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267577301637306_554_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267577301637306_554_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0b5c4c12 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267577301637306_554_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 7808, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267577363870163_555_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267577363870163_555_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..11ce1022 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267577363870163_555_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 13, 0, 1152, 13, 0, 1152, 13, 0, 1168, 13, 0, 1168, 13, 0, 1168, 13, 0, 1184, 13, 0, 1184, 13, 0, 1184, 13, 0, 2564, 14, 0, 2564, 14, 0, 2564, 14, 0, 2568, 14, 0, 2568, 14, 0, 2568, 14, 0, 2580, 14, 0, 2580, 14, 0, 2580, 14, 0, 2584, 14, 0, 2584, 14, 0, 2584, 14, 0, 2596, 14, 0, 2596, 14, 0, 2596, 14, 0, 2600, 14, 0, 2600, 14, 0, 2600, 14, 0, 3524, 7, 0, 3524, 7, 0, 3524, 7, 0, 3528, 7, 0, 3528, 7, 0, 3528, 7, 0, 3540, 7, 0, 3540, 7, 0, 3540, 7, 0, 3544, 7, 0, 3544, 7, 0, 3544, 7, 0, 3556, 7, 0, 3556, 7, 0, 3556, 7, 0, 3560, 7, 0, 3560, 7, 0, 3560, 7, 0, 4224, 13, 0, 4224, 13, 0, 4224, 13, 0, 4240, 13, 0, 4240, 13, 0, 4240, 13, 0, 4256, 13, 0, 4256, 13, 0, 4256, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267577464026290_556_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267577464026290_556_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab6de955 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267577464026290_556_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,190 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 12, 0, 848, 12, 0, 864, 12, 0, 864, 12, 0, 880, 12, 0, 880, 12, 0, 2000, 13, 0, 2000, 13, 0, 2000, 13, 0, 2016, 13, 0, 2016, 13, 0, 2016, 13, 0, 2032, 13, 0, 2032, 13, 0, 2032, 13, 0, 2576, 8, 0, 2592, 8, 0, 2608, 8, 0, 4624, 8, 0, 4640, 8, 0, 5904, 8, 0, 5920, 8, 0, 7696, 8, 0, 7712, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267577648693686_558_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267577648693686_558_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0eb7dc0b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267577648693686_558_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0, 1104, 1, 0, 1120, 1, 0, 2692, 1, 0, 2708, 1, 0, 2724, 1, 0, 3968, 2, 0, 3984, 2, 0, 5632, 2, 0, 5648, 2, 0, 8320, 8, 0, 8960, 9, 0, 8960, 9, 0, 9856, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267577733872418_559_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267577733872418_559_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9b607417 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267577733872418_559_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,225 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 8, 0, 6464, 5, 0, 6464, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267577782357479_560_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267577782357479_560_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b5ad8483 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267577782357479_560_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 1, 0, 1808, 1, 0, 1824, 1, 0, 2944, 1, 0, 2960, 1, 0, 2976, 1, 0, 9664, 13, 0, 9664, 13, 0, 9664, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267577849583125_561_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267577849583125_561_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8e3ea86c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267577849583125_561_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,384 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((353 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 2)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((396 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((407 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((422 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2240, 2, 0, 3712, 2, 0, 3728, 2, 0, 3744, 2, 0, 5504, 2, 0, 5520, 2, 0, 5536, 2, 0, 6464, 2, 0, 6480, 2, 0, 6496, 2, 0, 7424, 2, 0, 12544, 4, 0, 14272, 1, 0, 14848, 1, 0, 21568, 4, 0, 22608, 4, 0, 22624, 4, 0, 22640, 4, 0, 23040, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267577941450579_562_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267577941450579_562_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ffaeed5a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267577941450579_562_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,152 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 1920, 8, 0, 3392, 7, 0, 3392, 7, 0, 3392, 7, 0, 3408, 7, 0, 3408, 7, 0, 3408, 7, 0, 3424, 7, 0, 3424, 7, 0, 3424, 7, 0, 4544, 10, 0, 4544, 10, 0, 4560, 10, 0, 4560, 10, 0, 4576, 10, 0, 4576, 10, 0, 6720, 8, 0, 6736, 8, 0, 6752, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578037123211_563_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578037123211_563_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b8ffecd8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578037123211_563_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,132 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3008, 1, 0, 4096, 1, 0, 4992, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578089010526_564_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578089010526_564_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4a98f0ca --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578089010526_564_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2240, 4, 0, 2256, 4, 0, 2272, 4, 0, 13696, 6, 0, 13696, 6, 0, 13440, 9, 0, 13440, 9, 0, 14336, 5, 0, 14336, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578154809566_565_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578154809566_565_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c6e0dbf2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578154809566_565_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 3204, 2, 0, 3208, 2, 0, 3220, 2, 0, 3224, 2, 0, 3520, 4, 0, 3968, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578219773900_566_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578219773900_566_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3fcc9bae --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578219773900_566_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4928, 1, 0, 4932, 1, 0, 4936, 1, 0, 4944, 1, 0, 4948, 1, 0, 4952, 1, 0, 4960, 1, 0, 4964, 1, 0, 4968, 1, 0, 6144, 8, 0, 7296, 2, 0, 8512, 2, 0, 8528, 2, 0, 9344, 2, 0, 9360, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578349760299_567_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578349760299_567_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1bc11f4b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578349760299_567_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2368, 2, 0, 3904, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578402401757_568_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578402401757_568_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..be7d9f9b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578402401757_568_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 13, 0, 1152, 13, 0, 1152, 13, 0, 1168, 13, 0, 1168, 13, 0, 1168, 13, 0, 2560, 8, 0, 2564, 8, 0, 2576, 8, 0, 2580, 8, 0, 3520, 1, 0, 3536, 1, 0, 4676, 5, 0, 4676, 5, 0, 4680, 5, 0, 4680, 5, 0, 4692, 5, 0, 4692, 5, 0, 4696, 5, 0, 4696, 5, 0, 5252, 2, 0, 5256, 2, 0, 5268, 2, 0, 5272, 2, 0, 6208, 5, 0, 6208, 5, 0, 6224, 5, 0, 6224, 5, 0, 6912, 9, 0, 6912, 9, 0, 6928, 9, 0, 6928, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578495659539_569_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578495659539_569_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eaab4955 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578495659539_569_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,181 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3712, 8, 0, 4416, 8, 0, 4864, 8, 0, 6016, 2, 0, 7872, 2, 0, 8512, 9, 0, 8512, 9, 0, 9408, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578559845518_570_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578559845518_570_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..03ab13c0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578559845518_570_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 9, 0, 1216, 9, 0, 832, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578603202135_571_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578603202135_571_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..221535e7 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578603202135_571_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,339 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((111 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((188 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((195 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 117 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 1, 0, 864, 1, 0, 1936, 1, 0, 1940, 1, 0, 1944, 1, 0, 1952, 1, 0, 1956, 1, 0, 1960, 1, 0, 2576, 5, 0, 2576, 5, 0, 2580, 5, 0, 2580, 5, 0, 2584, 5, 0, 2584, 5, 0, 2592, 5, 0, 2592, 5, 0, 2596, 5, 0, 2596, 5, 0, 2600, 5, 0, 2600, 5, 0, 3600, 4, 0, 3616, 4, 0, 5440, 1, 0, 7120, 1, 0, 7124, 1, 0, 7128, 1, 0, 7136, 1, 0, 7140, 1, 0, 7144, 1, 0, 8208, 1, 0, 8224, 1, 0, 9152, 1, 0, 14016, 4, 0, 14032, 4, 0, 15680, 4, 0, 15696, 4, 0, 17216, 4, 0, 17232, 4, 0, 19072, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267578949060487_572_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267578949060487_572_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b1fe1670 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267578949060487_572_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,372 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((238 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((249 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((287 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((297 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((304 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((311 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1536, 4, 0, 1552, 4, 0, 3200, 8, 0, 3216, 8, 0, 4100, 8, 0, 4104, 8, 0, 4116, 8, 0, 4120, 8, 0, 4548, 8, 0, 4552, 8, 0, 4564, 8, 0, 4568, 8, 0, 11904, 8, 0, 11520, 6, 0, 11520, 6, 0, 11136, 1, 0, 14352, 1, 0, 14356, 1, 0, 14360, 1, 0, 14368, 1, 0, 14372, 1, 0, 14376, 1, 0, 14384, 1, 0, 14388, 1, 0, 14392, 1, 0, 15952, 1, 0, 15956, 1, 0, 15960, 1, 0, 15968, 1, 0, 15972, 1, 0, 15976, 1, 0, 15984, 1, 0, 15988, 1, 0, 15992, 1, 0, 20544, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267579389155395_573_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267579389155395_573_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5054c369 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267579389155395_573_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 2, 0, 2752, 8, 0, 5760, 10, 0, 5760, 10, 0, 6464, 10, 0, 6464, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267579450797017_574_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267579450797017_574_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..629c554e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267579450797017_574_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267579513189538_575_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267579513189538_575_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b0c8581a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267579513189538_575_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,236 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1792, 1, 0, 4048, 2, 0, 4052, 2, 0, 4064, 2, 0, 4068, 2, 0, 4080, 2, 0, 4084, 2, 0, 7616, 8, 0, 8576, 8, 0, 8592, 8, 0, 9732, 10, 0, 9732, 10, 0, 9736, 10, 0, 9736, 10, 0, 9740, 10, 0, 9740, 10, 0, 9748, 10, 0, 9748, 10, 0, 9752, 10, 0, 9752, 10, 0, 9756, 10, 0, 9756, 10, 0, 10884, 9, 0, 10884, 9, 0, 10888, 9, 0, 10888, 9, 0, 10892, 9, 0, 10892, 9, 0, 10900, 9, 0, 10900, 9, 0, 10904, 9, 0, 10904, 9, 0, 10908, 9, 0, 10908, 9, 0, 11716, 13, 0, 11716, 13, 0, 11716, 13, 0, 11720, 13, 0, 11720, 13, 0, 11720, 13, 0, 11724, 13, 0, 11724, 13, 0, 11724, 13, 0, 11732, 13, 0, 11732, 13, 0, 11732, 13, 0, 11736, 13, 0, 11736, 13, 0, 11736, 13, 0, 11740, 13, 0, 11740, 13, 0, 11740, 13, 0, 12868, 10, 0, 12868, 10, 0, 12872, 10, 0, 12872, 10, 0, 12876, 10, 0, 12876, 10, 0, 12884, 10, 0, 12884, 10, 0, 12888, 10, 0, 12888, 10, 0, 12892, 10, 0, 12892, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267579884683943_576_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267579884683943_576_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f7f52c30 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267579884683943_576_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,483 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((173 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((227 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((238 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((292 << 6) | (i6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter7 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter8 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + for (uint i9 = 0; (i9 < 3); i9 = (i9 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((362 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((371 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (408 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (429 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (454 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (472 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (476 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (491 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 279 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2432, 1, 0, 2448, 1, 0, 2464, 1, 0, 5056, 4, 0, 7696, 8, 0, 7700, 8, 0, 7704, 8, 0, 7712, 8, 0, 7716, 8, 0, 7720, 8, 0, 7728, 8, 0, 7732, 8, 0, 7736, 8, 0, 8400, 8, 0, 8404, 8, 0, 8408, 8, 0, 8416, 8, 0, 8420, 8, 0, 8424, 8, 0, 8432, 8, 0, 8436, 8, 0, 8440, 8, 0, 9360, 8, 0, 9376, 8, 0, 9392, 8, 0, 11072, 9, 0, 11072, 9, 0, 11076, 9, 0, 11076, 9, 0, 11080, 9, 0, 11080, 9, 0, 11088, 9, 0, 11088, 9, 0, 11092, 9, 0, 11092, 9, 0, 11096, 9, 0, 11096, 9, 0, 12736, 10, 0, 12736, 10, 0, 12740, 10, 0, 12740, 10, 0, 12744, 10, 0, 12744, 10, 0, 12752, 10, 0, 12752, 10, 0, 12756, 10, 0, 12756, 10, 0, 12760, 10, 0, 12760, 10, 0, 13696, 12, 0, 13696, 12, 0, 13700, 12, 0, 13700, 12, 0, 13704, 12, 0, 13704, 12, 0, 13712, 12, 0, 13712, 12, 0, 13716, 12, 0, 13716, 12, 0, 13720, 12, 0, 13720, 12, 0, 14528, 1, 0, 14532, 1, 0, 14536, 1, 0, 14544, 1, 0, 14548, 1, 0, 14552, 1, 0, 15232, 9, 0, 15232, 9, 0, 15236, 9, 0, 15236, 9, 0, 15240, 9, 0, 15240, 9, 0, 15248, 9, 0, 15248, 9, 0, 15252, 9, 0, 15252, 9, 0, 15256, 9, 0, 15256, 9, 0, 17536, 8, 0, 17552, 8, 0, 17568, 8, 0, 18692, 8, 0, 18696, 8, 0, 18708, 8, 0, 18712, 8, 0, 18724, 8, 0, 18728, 8, 0, 19584, 8, 0, 19600, 8, 0, 19616, 8, 0, 30208, 4, 0, 31424, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267580386887794_577_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267580386887794_577_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d0a9ca57 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267580386887794_577_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,246 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((187 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((204 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + if ((i4 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1664, 8, 0, 1680, 8, 0, 3392, 8, 0, 3396, 8, 0, 3400, 8, 0, 3408, 8, 0, 3412, 8, 0, 3416, 8, 0, 5952, 1, 0, 11972, 8, 0, 11976, 8, 0, 11988, 8, 0, 11992, 8, 0, 13060, 8, 0, 13064, 8, 0, 13076, 8, 0, 13080, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267580654394296_578_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267580654394296_578_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5389aafd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267580654394296_578_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,247 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0, 1104, 1, 0, 1120, 1, 0, 7168, 1, 0, 8064, 4, 0, 9152, 8, 0, 11456, 10, 0, 11456, 10, 0, 11072, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267580866462040_579_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267580866462040_579_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e339ebc3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267580866462040_579_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 1920, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267580916191863_580_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267580916191863_580_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f45af79b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267580916191863_580_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,341 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 4496, 8, 0, 4512, 8, 0, 5200, 8, 0, 5216, 8, 0, 5760, 8, 0, 17728, 4, 0, 18176, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267581014797412_581_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267581014797412_581_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..958c5d30 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267581014797412_581_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 5, 0, 1600, 5, 0, 2688, 1, 0, 2704, 1, 0, 4100, 5, 0, 4100, 5, 0, 4104, 5, 0, 4104, 5, 0, 4108, 5, 0, 4108, 5, 0, 4116, 5, 0, 4116, 5, 0, 4120, 5, 0, 4120, 5, 0, 4124, 5, 0, 4124, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267581102972693_582_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267581102972693_582_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b74fd8d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267581102972693_582_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 5, 0, 2368, 5, 0, 2112, 8, 0, 1472, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267581161320218_583_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267581161320218_583_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..74593b82 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267581161320218_583_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 9, 0, 1600, 9, 0, 1616, 9, 0, 1616, 9, 0, 1632, 9, 0, 1632, 9, 0, 2432, 9, 0, 2432, 9, 0, 2448, 9, 0, 2448, 9, 0, 2464, 9, 0, 2464, 9, 0, 3008, 2, 0, 3024, 2, 0, 3040, 2, 0, 4672, 4, 0, 4688, 4, 0, 4704, 4, 0, 5632, 6, 0, 5632, 6, 0, 5648, 6, 0, 5648, 6, 0, 5664, 6, 0, 5664, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267581251913299_584_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267581251913299_584_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a394247 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267581251913299_584_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2048, 5, 0, 2048, 5, 0, 3264, 9, 0, 3264, 9, 0, 4688, 2, 0, 4704, 2, 0, 5648, 2, 0, 5652, 2, 0, 5656, 2, 0, 5664, 2, 0, 5668, 2, 0, 5672, 2, 0, 5952, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267581313162903_585_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267581313162903_585_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c6d8a2f6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267581313162903_585_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2500, 1, 0, 2504, 1, 0, 2508, 1, 0, 2516, 1, 0, 2520, 1, 0, 2524, 1, 0, 2532, 1, 0, 2536, 1, 0, 2540, 1, 0, 3460, 1, 0, 3464, 1, 0, 3468, 1, 0, 3476, 1, 0, 3480, 1, 0, 3484, 1, 0, 3492, 1, 0, 3496, 1, 0, 3500, 1, 0, 3904, 1, 0, 3920, 1, 0, 3936, 1, 0, 4800, 4, 0, 8128, 1, 0, 9216, 8, 0, 10368, 8, 0, 10944, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267581429845699_586_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267581429845699_586_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267581429845699_586_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267581488791193_587_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267581488791193_587_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c01d9272 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267581488791193_587_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,218 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 9, 0, 3392, 9, 0, 4288, 4, 0, 4928, 1, 0, 5824, 4, 0, 6992, 8, 0, 7008, 8, 0, 7024, 8, 0, 8784, 8, 0, 8800, 8, 0, 8816, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267581662945645_589_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267581662945645_589_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4905378c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267581662945645_589_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 5, 0, 976, 5, 0, 992, 5, 0, 992, 5, 0, 2324, 1, 0, 2328, 1, 0, 2340, 1, 0, 2344, 1, 0, 4304, 2, 0, 4320, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267581737039164_590_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267581737039164_590_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..699b7a2c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267581737039164_590_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,623 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((222 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((267 << 6) | (i3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((281 << 6) | (i3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i5 == 1)) { + continue; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((380 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((398 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (423 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((438 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (449 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (460 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (470 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (488 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (499 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (506 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (527 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (541 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (552 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((568 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((577 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (586 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter9 = 0; + while ((counter9 < 2)) { + counter9 = (counter9 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((602 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((620 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((629 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (633 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 8144, 4, 0, 8160, 4, 0, 8176, 4, 0, 8768, 8, 0, 11136, 6, 0, 11136, 6, 0, 11152, 6, 0, 11152, 6, 0, 14788, 1, 0, 14792, 1, 0, 14796, 1, 0, 14804, 1, 0, 14808, 1, 0, 14812, 1, 0, 15488, 6, 0, 15488, 6, 0, 15504, 6, 0, 15504, 6, 0, 17088, 10, 0, 17088, 10, 0, 17092, 10, 0, 17092, 10, 0, 17096, 10, 0, 17096, 10, 0, 17104, 10, 0, 17104, 10, 0, 17108, 10, 0, 17108, 10, 0, 17112, 10, 0, 17112, 10, 0, 19136, 14, 0, 19136, 14, 0, 19136, 14, 0, 19152, 14, 0, 19152, 14, 0, 19152, 14, 0, 19776, 1, 0, 28032, 4, 0, 28048, 4, 0, 28064, 4, 0, 28736, 4, 0, 34624, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267582157998036_591_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267582157998036_591_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5e5d1b3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267582157998036_591_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,505 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((189 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((200 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((323 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (412 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (434 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (441 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (445 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2768, 9, 0, 2768, 9, 0, 2784, 9, 0, 2784, 9, 0, 3408, 8, 0, 3424, 8, 0, 5392, 8, 0, 5408, 8, 0, 7232, 8, 0, 7680, 4, 0, 9024, 4, 0, 9040, 4, 0, 9056, 4, 0, 10240, 4, 0, 10244, 4, 0, 10256, 4, 0, 10260, 4, 0, 10272, 4, 0, 10276, 4, 0, 12800, 2, 0, 12804, 2, 0, 12816, 2, 0, 12820, 2, 0, 12832, 2, 0, 12836, 2, 0, 14336, 5, 0, 14336, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267582273429311_592_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267582273429311_592_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6c731a6f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267582273429311_592_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1152, 5, 0, 1152, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267582330499820_593_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267582330499820_593_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ae2fa0ce --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267582330499820_593_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,614 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((168 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i6 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (329 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((390 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((397 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (404 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (418 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (432 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((447 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((460 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (467 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (474 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (484 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (493 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (510 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (519 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (526 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (533 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 8960, 4, 0, 8976, 4, 0, 12420, 4, 0, 12424, 4, 0, 12436, 4, 0, 12440, 4, 0, 15232, 8, 0, 15248, 8, 0, 15264, 8, 0, 15872, 8, 0, 15888, 8, 0, 15904, 8, 0, 21056, 5, 0, 21056, 5, 0, 23552, 8, 0, 27648, 8, 0, 28608, 8, 0, 28624, 8, 0, 28640, 8, 0, 30336, 3, 0, 30336, 3, 0, 30976, 1, 0, 31552, 1, 0, 32640, 1, 0, 33216, 1, 0, 33664, 3, 0, 33664, 3, 0, 34112, 3, 0, 34112, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267582657384099_594_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267582657384099_594_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7be49bd6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267582657384099_594_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6976, 4, 0, 8400, 8, 0, 8416, 8, 0, 8432, 8, 0, 14912, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267582737619913_595_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267582737619913_595_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..03851596 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267582737619913_595_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((203 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1920, 8, 0, 1936, 8, 0, 1952, 8, 0, 3584, 10, 0, 3584, 10, 0, 3600, 10, 0, 3600, 10, 0, 3616, 10, 0, 3616, 10, 0, 4544, 2, 0, 4560, 2, 0, 4576, 2, 0, 6144, 1, 0, 6160, 1, 0, 6176, 1, 0, 7040, 1, 0, 7056, 1, 0, 7072, 1, 0, 9856, 9, 0, 9856, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267582863757594_596_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267582863757594_596_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3f3e2372 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267582863757594_596_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 8, 0, 912, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267583128427841_598_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267583128427841_598_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f0096ba1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267583128427841_598_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,478 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (415 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 13, 0, 1856, 13, 0, 1856, 13, 0, 2496, 1, 0, 5248, 4, 0, 5696, 8, 0, 7488, 1, 0, 7504, 1, 0, 7520, 1, 0, 9152, 1, 0, 9168, 1, 0, 9184, 1, 0, 26560, 6, 0, 26560, 6, 0, 27008, 14, 0, 27008, 14, 0, 27008, 14, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267583354165777_600_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267583354165777_600_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e3eddcb5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267583354165777_600_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2512, 2, 0, 2516, 2, 0, 2520, 2, 0, 2528, 2, 0, 2532, 2, 0, 2536, 2, 0, 4560, 2, 0, 4564, 2, 0, 4568, 2, 0, 4576, 2, 0, 4580, 2, 0, 4584, 2, 0, 7744, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267583437725259_601_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267583437725259_601_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e1384d29 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267583437725259_601_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 5, 0, 1984, 5, 0, 2624, 5, 0, 2624, 5, 0, 3904, 4, 0, 5056, 10, 0, 5056, 10, 0, 7040, 8, 0, 7056, 8, 0, 7072, 8, 0, 7744, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267583647002305_604_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267583647002305_604_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..37c0a71e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267583647002305_604_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 5184, 2, 0, 5200, 2, 0, 5216, 2, 0, 5632, 2, 0, 7296, 1, 0, 6912, 6, 0, 6912, 6, 0, 7936, 5, 0, 7936, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267583721279313_605_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267583721279313_605_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0d0b2cda --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267583721279313_605_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 10, 0, 976, 10, 0, 992, 10, 0, 992, 10, 0, 1008, 10, 0, 1008, 10, 0, 2384, 8, 0, 2388, 8, 0, 2392, 8, 0, 2400, 8, 0, 2404, 8, 0, 2408, 8, 0, 2416, 8, 0, 2420, 8, 0, 2424, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267583829438524_606_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267583829438524_606_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d895ba1f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267583829438524_606_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 99 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 9, 0, 1360, 9, 0, 2388, 10, 0, 2388, 10, 0, 2392, 10, 0, 2392, 10, 0, 2396, 10, 0, 2396, 10, 0, 3028, 5, 0, 3028, 5, 0, 3032, 5, 0, 3032, 5, 0, 3036, 5, 0, 3036, 5, 0, 4180, 10, 0, 4180, 10, 0, 4184, 10, 0, 4184, 10, 0, 4188, 10, 0, 4188, 10, 0, 4992, 1, 0, 10004, 8, 0, 10008, 8, 0, 10012, 8, 0, 10020, 8, 0, 10024, 8, 0, 10028, 8, 0, 10580, 8, 0, 10584, 8, 0, 10588, 8, 0, 10596, 8, 0, 10600, 8, 0, 10604, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267584081320297_609_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267584081320297_609_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c10e27ad --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267584081320297_609_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 5, 0, 1344, 5, 0, 1088, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267584125172074_610_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267584125172074_610_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8d47c78f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267584125172074_610_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,842 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((334 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((386 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((395 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((424 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (432 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 2: { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((450 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((468 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((494 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((509 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (524 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (533 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (537 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (544 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 3)) { + counter9 = (counter9 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((564 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i10 = 0; (i10 < 3); i10 = (i10 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((585 << 6) | (counter9 << 4)) | (i10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((596 << 6) | (counter9 << 4)) | (i10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((607 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (617 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (627 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i11 = 0; (i11 < 3); i11 = (i11 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((644 << 6) | (i11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((651 << 6) | (i11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (658 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (668 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (677 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (682 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (689 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (696 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (706 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (716 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (725 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (732 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter12 = 0; + while ((counter12 < 2)) { + counter12 = (counter12 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((753 << 6) | (counter12 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((764 << 6) | (counter12 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((775 << 6) | (counter12 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((792 << 6) | (counter12 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter12 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (799 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 99 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5776, 4, 0, 5780, 4, 0, 5784, 4, 0, 5792, 4, 0, 5796, 4, 0, 5800, 4, 0, 6480, 4, 0, 6484, 4, 0, 6488, 4, 0, 6496, 4, 0, 6500, 4, 0, 6504, 4, 0, 9936, 8, 0, 9952, 8, 0, 9968, 8, 0, 14208, 1, 0, 14848, 1, 0, 17984, 1, 0, 38160, 8, 0, 38164, 8, 0, 38168, 8, 0, 38176, 8, 0, 38180, 8, 0, 38184, 8, 0, 38192, 8, 0, 38196, 8, 0, 38200, 8, 0, 38864, 8, 0, 38880, 8, 0, 38896, 8, 0, 39488, 1, 0, 45184, 4, 0, 50704, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267584756174404_611_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267584756174404_611_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ad6c0013 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267584756174404_611_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4992, 4, 0, 5008, 4, 0, 5024, 4, 0, 6020, 4, 0, 6024, 4, 0, 6028, 4, 0, 6036, 4, 0, 6040, 4, 0, 6044, 4, 0, 6052, 4, 0, 6056, 4, 0, 6060, 4, 0, 6976, 4, 0, 6992, 4, 0, 7008, 4, 0, 8400, 11, 0, 8400, 11, 0, 8400, 11, 0, 8416, 11, 0, 8416, 11, 0, 8416, 11, 0, 8432, 11, 0, 8432, 11, 0, 8432, 11, 0, 10004, 8, 0, 10008, 8, 0, 10020, 8, 0, 10024, 8, 0, 10036, 8, 0, 10040, 8, 0, 10708, 8, 0, 10712, 8, 0, 10724, 8, 0, 10728, 8, 0, 10740, 8, 0, 10744, 8, 0, 12496, 13, 0, 12496, 13, 0, 12496, 13, 0, 12512, 13, 0, 12512, 13, 0, 12512, 13, 0, 12528, 13, 0, 12528, 13, 0, 12528, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267584915595748_612_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267584915595748_612_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fe872681 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267584915595748_612_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 1664, 4, 0, 3776, 4, 0, 6528, 2, 0, 7424, 10, 0, 7424, 10, 0, 9680, 10, 0, 9680, 10, 0, 9696, 10, 0, 9696, 10, 0, 9712, 10, 0, 9712, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267584999752202_613_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267584999752202_613_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..26aa8240 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267584999752202_613_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 10, 0, 3072, 10, 0, 3088, 10, 0, 3088, 10, 0, 3104, 10, 0, 3104, 10, 0, 4288, 10, 0, 4288, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267585101452444_614_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267585101452444_614_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2ff9825d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267585101452444_614_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,309 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((159 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 4800, 4, 0, 7488, 10, 0, 7488, 10, 0, 7104, 1, 0, 8768, 9, 0, 8768, 9, 0, 10192, 8, 0, 10196, 8, 0, 10208, 8, 0, 10212, 8, 0, 10224, 8, 0, 10228, 8, 0, 11792, 1, 0, 11808, 1, 0, 11824, 1, 0, 16832, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267585219710944_615_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267585219710944_615_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9a7b404f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267585219710944_615_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,397 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((328 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((343 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((352 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((361 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((368 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((379 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 1312, 1, 0, 2448, 1, 0, 2464, 1, 0, 4688, 1, 0, 4704, 1, 0, 9920, 4, 0, 9936, 4, 0, 9952, 4, 0, 11136, 8, 0, 11152, 8, 0, 11168, 8, 0, 11968, 8, 0, 11984, 8, 0, 12000, 8, 0, 13696, 1, 0, 14336, 1, 0, 16128, 1, 0, 16704, 5, 0, 16704, 5, 0, 22548, 1, 0, 22552, 1, 0, 22564, 1, 0, 22568, 1, 0, 24272, 1, 0, 24288, 1, 0, 24704, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267585425029884_616_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267585425029884_616_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1459877f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267585425029884_616_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,490 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((286 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((295 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((310 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + if ((i5 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((357 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 105 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 1, 0, 1824, 1, 0, 3536, 1, 0, 3552, 1, 0, 5392, 10, 0, 5392, 10, 0, 5408, 10, 0, 5408, 10, 0, 5424, 10, 0, 5424, 10, 0, 6288, 10, 0, 6288, 10, 0, 6304, 10, 0, 6304, 10, 0, 6320, 10, 0, 6320, 10, 0, 8272, 8, 0, 8288, 8, 0, 8304, 8, 0, 11088, 8, 0, 11104, 8, 0, 11120, 8, 0, 12176, 1, 0, 12192, 1, 0, 12208, 1, 0, 13008, 1, 0, 13024, 1, 0, 13040, 1, 0, 15808, 4, 0, 17152, 8, 0, 17168, 8, 0, 17184, 8, 0, 21120, 1, 0, 23616, 4, 0, 24064, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267585760225241_617_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267585760225241_617_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..47057e6d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267585760225241_617_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1424, 9, 0, 1424, 9, 0, 1440, 9, 0, 1440, 9, 0, 3280, 8, 0, 3296, 8, 0, 4688, 9, 0, 4688, 9, 0, 4704, 9, 0, 4704, 9, 0, 5568, 6, 0, 5568, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267585820594787_618_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267585820594787_618_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e59c1847 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267585820594787_618_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 9, 0, 1216, 9, 0, 960, 4, 0, 2772, 8, 0, 2776, 8, 0, 2788, 8, 0, 2792, 8, 0, 3348, 8, 0, 3352, 8, 0, 3364, 8, 0, 3368, 8, 0, 4304, 8, 0, 4320, 8, 0, 4736, 8, 0, 6400, 13, 0, 6400, 13, 0, 6400, 13, 0, 6016, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267585888831000_619_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267585888831000_619_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d85f9377 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267585888831000_619_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2896, 4, 0, 2912, 4, 0, 4752, 5, 0, 4752, 5, 0, 4768, 5, 0, 4768, 5, 0, 5376, 9, 0, 5376, 9, 0, 6272, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267585955429633_620_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267585955429633_620_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..726f1bec --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267585955429633_620_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 14, 0, 3264, 14, 0, 3264, 14, 0, 2752, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267586232082328_623_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267586232082328_623_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2d50035c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267586232082328_623_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,92 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 2)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3136, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267586279806389_624_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267586279806389_624_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0f84db68 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267586279806389_624_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 10, 0, 1088, 10, 0, 832, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267586323426561_625_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267586323426561_625_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c2adc4dd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267586323426561_625_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,309 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4544, 5, 0, 4544, 5, 0, 7936, 1, 0, 10448, 2, 0, 10452, 2, 0, 10456, 2, 0, 10464, 2, 0, 10468, 2, 0, 10472, 2, 0, 11408, 2, 0, 11412, 2, 0, 11416, 2, 0, 11424, 2, 0, 11428, 2, 0, 11432, 2, 0, 16832, 14, 0, 16832, 14, 0, 16832, 14, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267586404706451_626_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267586404706451_626_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e9241cfd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267586404706451_626_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,575 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((368 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i5 == 1)) { + continue; + } + if ((i5 == 2)) { + break; + } + } + break; + } + case 2: { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((390 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((408 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((426 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((433 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter8 == 1)) { + break; + } + } + } + break; + } + case 3: { + for (uint i9 = 0; (i9 < 3); i9 = (i9 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((455 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((465 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((491 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((498 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((507 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i10 = 0; (i10 < 3); i10 = (i10 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((530 << 6) | (i9 << 4)) | (i10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i10 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((542 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((555 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i9 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2192, 1, 0, 7104, 4, 0, 8768, 8, 0, 8784, 8, 0, 9472, 8, 0, 9488, 8, 0, 13008, 1, 0, 13024, 1, 0, 13040, 1, 0, 24976, 4, 0, 24992, 4, 0, 25008, 4, 0, 26132, 4, 0, 26148, 4, 0, 26164, 4, 0, 27284, 4, 0, 27300, 4, 0, 27316, 4, 0, 29120, 8, 0, 29136, 8, 0, 29152, 8, 0, 33920, 8, 0, 33924, 8, 0, 33928, 8, 0, 33936, 8, 0, 33940, 8, 0, 33944, 8, 0, 33952, 8, 0, 33956, 8, 0, 33960, 8, 0, 34688, 8, 0, 34704, 8, 0, 34720, 8, 0, 35520, 8, 0, 35536, 8, 0, 35552, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267586658181621_627_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267586658181621_627_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6fb033f4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267586658181621_627_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,88 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2176, 10, 0, 2176, 10, 0, 2192, 10, 0, 2192, 10, 0, 2208, 10, 0, 2208, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267586786892323_629_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267586786892323_629_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3a0887d5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267586786892323_629_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 201 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1920, 9, 0, 1920, 9, 0, 1936, 9, 0, 1936, 9, 0, 2820, 8, 0, 2824, 8, 0, 2828, 8, 0, 2836, 8, 0, 2840, 8, 0, 2844, 8, 0, 3268, 3, 0, 3268, 3, 0, 3272, 3, 0, 3272, 3, 0, 3276, 3, 0, 3276, 3, 0, 3284, 3, 0, 3284, 3, 0, 3288, 3, 0, 3288, 3, 0, 3292, 3, 0, 3292, 3, 0, 3776, 15, 0, 3776, 15, 0, 3776, 15, 0, 3776, 15, 0, 5844, 8, 0, 5848, 8, 0, 5852, 8, 0, 5860, 8, 0, 5864, 8, 0, 5868, 8, 0, 5876, 8, 0, 5880, 8, 0, 5884, 8, 0, 7444, 1, 0, 7448, 1, 0, 7452, 1, 0, 7460, 1, 0, 7464, 1, 0, 7468, 1, 0, 7476, 1, 0, 7480, 1, 0, 7484, 1, 0, 8404, 1, 0, 8408, 1, 0, 8412, 1, 0, 8420, 1, 0, 8424, 1, 0, 8428, 1, 0, 8436, 1, 0, 8440, 1, 0, 8444, 1, 0, 13520, 1, 0, 13536, 1, 0, 13552, 1, 0, 14416, 5, 0, 14416, 5, 0, 14432, 5, 0, 14432, 5, 0, 14448, 5, 0, 14448, 5, 0, 15120, 1, 0, 15136, 1, 0, 15152, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267587459472650_630_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267587459472650_630_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3aec39a0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267587459472650_630_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,462 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((322 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((360 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((375 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((393 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((404 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((411 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (415 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 5, 0, 2048, 5, 0, 1792, 8, 0, 1408, 2, 0, 3472, 12, 0, 3472, 12, 0, 3476, 12, 0, 3476, 12, 0, 3488, 12, 0, 3488, 12, 0, 3492, 12, 0, 3492, 12, 0, 4624, 9, 0, 4624, 9, 0, 4628, 9, 0, 4628, 9, 0, 4640, 9, 0, 4640, 9, 0, 4644, 9, 0, 4644, 9, 0, 18112, 4, 0, 19920, 8, 0, 19936, 8, 0, 20624, 8, 0, 20640, 8, 0, 23040, 8, 0, 23056, 8, 0, 23072, 8, 0, 25856, 8, 0, 25872, 8, 0, 25888, 8, 0, 26304, 8, 0, 26320, 8, 0, 26336, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267587776799949_632_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267587776799949_632_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6070ffb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267587776799949_632_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 5328, 8, 0, 5344, 8, 0, 7616, 1, 0, 7632, 1, 0, 7648, 1, 0, 10944, 9, 0, 10944, 9, 0, 12800, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267587892121364_633_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267587892121364_633_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0bef3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267587892121364_633_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,389 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [12032, 2, 0, 12352, 4, 0, 12800, 12, 0, 12800, 12, 0, 13696, 9, 0, 13696, 9, 0, 14912, 2, 0, 14928, 2, 0, 21504, 10, 0, 21504, 10, 0, 22656, 8, 0, 23360, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267588057495954_635_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267588057495954_635_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..064ceb69 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267588057495954_635_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,166 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((105 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 1984, 1, 0, 2880, 4, 0, 3920, 8, 0, 6160, 8, 0, 6164, 8, 0, 6168, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267588117938380_636_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267588117938380_636_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1e79109f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267588117938380_636_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 9, 0, 4096, 9, 0, 3712, 4, 0, 3328, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267588178174865_637_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267588178174865_637_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267588178174865_637_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267588228784806_638_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267588228784806_638_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f3b14b44 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267588228784806_638_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1792, 9, 0, 1792, 9, 0, 8128, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267588287969812_639_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267588287969812_639_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267588287969812_639_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267588330792045_640_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267588330792045_640_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5eb92101 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267588330792045_640_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,384 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((305 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((314 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((323 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 7424, 2, 0, 7440, 2, 0, 8000, 2, 0, 8320, 4, 0, 12800, 8, 0, 16128, 9, 0, 16128, 9, 0, 17024, 4, 0, 18448, 13, 0, 18448, 13, 0, 18448, 13, 0, 19540, 5, 0, 19540, 5, 0, 20692, 5, 0, 20692, 5, 0, 21840, 10, 0, 21840, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267588433419011_641_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267588433419011_641_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef83dff5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267588433419011_641_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,187 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 9, 0, 1600, 9, 0, 1616, 9, 0, 1616, 9, 0, 4160, 8, 0, 4176, 8, 0, 5248, 1, 0, 5264, 1, 0, 9792, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267588506139232_642_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267588506139232_642_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..568fa3ac --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267588506139232_642_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,459 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((213 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((242 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((255 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((266 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((301 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((320 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((329 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((339 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((348 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((361 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (427 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((443 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((452 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (457 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 1, 0, 6656, 2, 0, 11520, 4, 0, 13636, 4, 0, 13640, 4, 0, 13652, 4, 0, 13656, 4, 0, 13668, 4, 0, 13672, 4, 0, 16324, 8, 0, 16328, 8, 0, 16340, 8, 0, 16344, 8, 0, 16356, 8, 0, 16360, 8, 0, 17028, 8, 0, 17032, 8, 0, 17044, 8, 0, 17048, 8, 0, 17060, 8, 0, 17064, 8, 0, 17472, 8, 0, 17488, 8, 0, 17504, 8, 0, 18640, 13, 0, 18640, 13, 0, 18640, 13, 0, 18656, 13, 0, 18656, 13, 0, 18656, 13, 0, 18672, 13, 0, 18672, 13, 0, 18672, 13, 0, 19920, 1, 0, 19936, 1, 0, 19952, 1, 0, 21072, 4, 0, 21088, 4, 0, 21104, 4, 0, 21712, 4, 0, 21728, 4, 0, 21744, 4, 0, 23120, 4, 0, 23136, 4, 0, 23152, 4, 0, 28944, 2, 0, 28960, 2, 0, 29248, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267589932002890_644_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267589932002890_644_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..70dd0163 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267589932002890_644_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 9, 0, 3008, 9, 0, 2624, 6, 0, 2624, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267589988820473_645_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267589988820473_645_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9753ddcb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267589988820473_645_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,89 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 1312, 1, 0, 2048, 5, 0, 2048, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267590046420685_646_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267590046420685_646_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c9e75be --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267590046420685_646_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,370 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((304 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((313 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((318 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((325 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((336 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 1232, 1, 0, 2624, 1, 0, 2628, 1, 0, 2640, 1, 0, 2644, 1, 0, 6592, 2, 0, 10112, 2, 0, 10128, 2, 0, 17280, 4, 0, 18320, 8, 0, 18336, 8, 0, 18352, 8, 0, 20816, 8, 0, 20820, 8, 0, 20824, 8, 0, 20832, 8, 0, 20836, 8, 0, 20840, 8, 0, 20848, 8, 0, 20852, 8, 0, 20856, 8, 0, 21520, 8, 0, 21524, 8, 0, 21528, 8, 0, 21536, 8, 0, 21540, 8, 0, 21544, 8, 0, 21552, 8, 0, 21556, 8, 0, 21560, 8, 0, 21968, 8, 0, 21984, 8, 0, 22000, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267590175336538_647_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267590175336538_647_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53f3349c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267590175336538_647_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 1728, 9, 0, 1728, 9, 0, 4224, 1, 0, 6400, 1, 0, 13696, 4, 0, 13712, 4, 0, 15104, 8, 0, 15120, 8, 0, 15936, 8, 0, 15952, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267590252723819_648_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267590252723819_648_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4a1f7957 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267590252723819_648_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,515 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (383 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((410 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((424 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((433 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (459 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((475 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((482 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (502 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2944, 7, 0, 2944, 7, 0, 2944, 7, 0, 2560, 8, 0, 10048, 1, 0, 12736, 1, 0, 24256, 12, 0, 24256, 12, 0, 25152, 9, 0, 25152, 9, 0, 26240, 2, 0, 26256, 2, 0, 26272, 2, 0, 27136, 2, 0, 27152, 2, 0, 27168, 2, 0, 27712, 2, 0, 27728, 2, 0, 27744, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267590331233151_649_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267590331233151_649_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..326d5359 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267590331233151_649_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 3152, 1, 0, 3168, 1, 0, 3184, 1, 0, 8784, 4, 0, 8800, 4, 0, 10432, 8, 0, 10448, 8, 0, 15168, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267590418494737_650_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267590418494737_650_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..da02beda --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267590418494737_650_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,301 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3600, 8, 0, 3616, 8, 0, 4304, 2, 0, 4320, 2, 0, 5840, 10, 0, 5840, 10, 0, 5856, 10, 0, 5856, 10, 0, 7184, 1, 0, 7200, 1, 0, 10256, 1, 0, 10272, 1, 0, 12752, 2, 0, 12756, 2, 0, 12768, 2, 0, 12772, 2, 0, 13712, 2, 0, 13716, 2, 0, 13728, 2, 0, 13732, 2, 0, 17280, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267590536399611_651_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267590536399611_651_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..be8c76a0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267590536399611_651_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,314 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 2496, 9, 0, 2496, 9, 0, 2512, 9, 0, 2512, 9, 0, 2528, 9, 0, 2528, 9, 0, 5824, 9, 0, 5824, 9, 0, 6864, 4, 0, 6880, 4, 0, 6896, 4, 0, 9024, 1, 0, 8640, 12, 0, 8640, 12, 0, 8384, 2, 0, 9664, 1, 0, 17152, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267590617002526_652_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267590617002526_652_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2c3cb7f0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267590617002526_652_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 4, 0, 4416, 5, 0, 4416, 5, 0, 6528, 13, 0, 6528, 13, 0, 6528, 13, 0, 6272, 2, 0, 7680, 4, 0, 8848, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267590739045420_654_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267590739045420_654_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..76c39e8f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267590739045420_654_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,219 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 3136, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267590794628552_655_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267590794628552_655_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b132d643 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267590794628552_655_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,338 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((i0 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((291 << 6) | (i6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((302 << 6) | (i6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4160, 1, 0, 5376, 9, 0, 5376, 9, 0, 5392, 9, 0, 5392, 9, 0, 7936, 8, 0, 11328, 2, 0, 11344, 2, 0, 17472, 5, 0, 17472, 5, 0, 17488, 5, 0, 17488, 5, 0, 18628, 5, 0, 18628, 5, 0, 18632, 5, 0, 18632, 5, 0, 18644, 5, 0, 18644, 5, 0, 18648, 5, 0, 18648, 5, 0, 20032, 5, 0, 20032, 5, 0, 20048, 5, 0, 20048, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267591587680872_659_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267591587680872_659_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ad82c331 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267591587680872_659_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 1, 0, 1664, 1, 0, 4096, 4, 0, 4544, 8, 0, 5184, 1, 0, 6080, 2, 0, 10048, 4, 0, 10496, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267591649809560_660_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267591649809560_660_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..25485ee4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267591649809560_660_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,382 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 3)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2388, 1, 0, 2392, 1, 0, 2404, 1, 0, 2408, 1, 0, 2420, 1, 0, 2424, 1, 0, 2964, 1, 0, 2968, 1, 0, 2980, 1, 0, 2984, 1, 0, 2996, 1, 0, 3000, 1, 0, 12288, 8, 0, 15360, 1, 0, 17600, 4, 0, 19136, 4, 0, 20800, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267591811117670_661_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267591811117670_661_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cfb076d0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267591811117670_661_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 9, 0, 1216, 9, 0, 2384, 1, 0, 2400, 1, 0, 2416, 1, 0, 3600, 8, 0, 3604, 8, 0, 3608, 8, 0, 3616, 8, 0, 3620, 8, 0, 3624, 8, 0, 3632, 8, 0, 3636, 8, 0, 3640, 8, 0, 4176, 1, 0, 4192, 1, 0, 4208, 1, 0, 4864, 8, 0, 6208, 4, 0, 10176, 5, 0, 10176, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267592219025904_663_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267592219025904_663_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b48673e1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267592219025904_663_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 10, 0, 1024, 10, 0, 1040, 10, 0, 1040, 10, 0, 1056, 10, 0, 1056, 10, 0, 2436, 7, 0, 2436, 7, 0, 2436, 7, 0, 2440, 7, 0, 2440, 7, 0, 2440, 7, 0, 2444, 7, 0, 2444, 7, 0, 2444, 7, 0, 2452, 7, 0, 2452, 7, 0, 2452, 7, 0, 2456, 7, 0, 2456, 7, 0, 2456, 7, 0, 2460, 7, 0, 2460, 7, 0, 2460, 7, 0, 2468, 7, 0, 2468, 7, 0, 2468, 7, 0, 2472, 7, 0, 2472, 7, 0, 2472, 7, 0, 2476, 7, 0, 2476, 7, 0, 2476, 7, 0, 3076, 1, 0, 3080, 1, 0, 3084, 1, 0, 3092, 1, 0, 3096, 1, 0, 3100, 1, 0, 3108, 1, 0, 3112, 1, 0, 3116, 1, 0, 9668, 5, 0, 9668, 5, 0, 9672, 5, 0, 9672, 5, 0, 9676, 5, 0, 9676, 5, 0, 9684, 5, 0, 9684, 5, 0, 9688, 5, 0, 9688, 5, 0, 9692, 5, 0, 9692, 5, 0, 9700, 5, 0, 9700, 5, 0, 9704, 5, 0, 9704, 5, 0, 9708, 5, 0, 9708, 5, 0, 10240, 10, 0, 10240, 10, 0, 10256, 10, 0, 10256, 10, 0, 10272, 10, 0, 10272, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267593081155594_666_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267593081155594_666_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fc9ef3df --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267593081155594_666_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((50 << 6) | (i0 << 4)) | (counter1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((85 << 6) | (i0 << 4)) | (counter1 << 2)) | i3); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 13, 0, 1152, 13, 0, 1152, 13, 0, 1168, 13, 0, 1168, 13, 0, 1168, 13, 0, 3205, 9, 0, 3205, 9, 0, 3206, 9, 0, 3206, 9, 0, 3207, 9, 0, 3207, 9, 0, 3221, 9, 0, 3221, 9, 0, 3222, 9, 0, 3222, 9, 0, 3223, 9, 0, 3223, 9, 0, 3908, 9, 0, 3908, 9, 0, 3924, 9, 0, 3924, 9, 0, 4484, 2, 0, 4500, 2, 0, 6660, 1, 0, 6676, 1, 0, 9232, 8, 0, 9248, 8, 0, 12560, 8, 0, 12576, 8, 0, 13520, 8, 0, 13536, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267593258844014_667_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267593258844014_667_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7f7fdb90 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267593258844014_667_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,472 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((262 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((271 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((309 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((330 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((348 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((361 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter8 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((371 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (383 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 14720, 8, 0, 18048, 1, 0, 18064, 1, 0, 24064, 4, 0, 24512, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267593543231447_670_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267593543231447_670_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6b115406 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267593543231447_670_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,101 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 9, 0, 1152, 9, 0, 1168, 9, 0, 1168, 9, 0, 1184, 9, 0, 1184, 9, 0, 2432, 13, 0, 2432, 13, 0, 2432, 13, 0, 2448, 13, 0, 2448, 13, 0, 2448, 13, 0, 2464, 13, 0, 2464, 13, 0, 2464, 13, 0, 3588, 4, 0, 3592, 4, 0, 3604, 4, 0, 3608, 4, 0, 3620, 4, 0, 3624, 4, 0, 4288, 9, 0, 4288, 9, 0, 4304, 9, 0, 4304, 9, 0, 4320, 9, 0, 4320, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267593639265920_671_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267593639265920_671_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..92391aec --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267593639265920_671_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,426 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((287 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((323 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((348 << 6) | (i2 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((358 << 6) | (i2 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((375 << 6) | (i2 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + } + if ((i2 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 195 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 9, 0, 1296, 9, 0, 1312, 9, 0, 1312, 9, 0, 2192, 1, 0, 2208, 1, 0, 4688, 8, 0, 4704, 8, 0, 5584, 8, 0, 5600, 8, 0, 6288, 8, 0, 6304, 8, 0, 6992, 9, 0, 6992, 9, 0, 7008, 9, 0, 7008, 9, 0, 8640, 2, 0, 9552, 2, 0, 9568, 2, 0, 11904, 4, 0, 12544, 9, 0, 12544, 9, 0, 14848, 4, 0, 20224, 2, 0, 20240, 2, 0, 20256, 2, 0, 20672, 4, 0, 20688, 4, 0, 20704, 4, 0, 22272, 4, 0, 22276, 4, 0, 22280, 4, 0, 22288, 4, 0, 22292, 4, 0, 22296, 4, 0, 22304, 4, 0, 22308, 4, 0, 22312, 4, 0, 24000, 13, 0, 24000, 13, 0, 24000, 13, 0, 24004, 13, 0, 24004, 13, 0, 24004, 13, 0, 24008, 13, 0, 24008, 13, 0, 24008, 13, 0, 24016, 13, 0, 24016, 13, 0, 24016, 13, 0, 24020, 13, 0, 24020, 13, 0, 24020, 13, 0, 24024, 13, 0, 24024, 13, 0, 24024, 13, 0, 24032, 13, 0, 24032, 13, 0, 24032, 13, 0, 24036, 13, 0, 24036, 13, 0, 24036, 13, 0, 24040, 13, 0, 24040, 13, 0, 24040, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267594437462223_672_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267594437462223_672_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6ed39141 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267594437462223_672_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i2 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 6400, 4, 0, 8256, 8, 0, 10176, 9, 0, 10176, 9, 0, 11840, 9, 0, 11840, 9, 0, 11856, 9, 0, 11856, 9, 0, 11872, 9, 0, 11872, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267594614095085_674_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267594614095085_674_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d0030a3b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267594614095085_674_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,226 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2192, 1, 0, 2208, 1, 0, 5504, 4, 0, 5520, 4, 0, 5536, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267594725178144_676_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267594725178144_676_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..65149398 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267594725178144_676_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((174 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4800, 8, 0, 4816, 8, 0, 5760, 10, 0, 5760, 10, 0, 5776, 10, 0, 5776, 10, 0, 7936, 5, 0, 7936, 5, 0, 7952, 5, 0, 7952, 5, 0, 9728, 4, 0, 9744, 4, 0, 10688, 4, 0, 10692, 4, 0, 10696, 4, 0, 10704, 4, 0, 10708, 4, 0, 10712, 4, 0, 12416, 4, 0, 12432, 4, 0, 12992, 5, 0, 12992, 5, 0, 13008, 5, 0, 13008, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267594827972944_677_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267594827972944_677_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6fd79eb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267594827972944_677_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,132 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 8, 0, 6032, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267594886557944_678_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267594886557944_678_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a4fe8be --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267594886557944_678_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,386 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((219 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((275 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 117 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 9, 0, 2112, 9, 0, 2116, 9, 0, 2116, 9, 0, 2120, 9, 0, 2120, 9, 0, 2128, 9, 0, 2128, 9, 0, 2132, 9, 0, 2132, 9, 0, 2136, 9, 0, 2136, 9, 0, 3072, 9, 0, 3072, 9, 0, 3076, 9, 0, 3076, 9, 0, 3080, 9, 0, 3080, 9, 0, 3088, 9, 0, 3088, 9, 0, 3092, 9, 0, 3092, 9, 0, 3096, 9, 0, 3096, 9, 0, 3840, 1, 0, 3856, 1, 0, 7616, 4, 0, 9856, 9, 0, 9856, 9, 0, 9600, 4, 0, 17600, 2, 0, 17604, 2, 0, 17608, 2, 0, 17616, 2, 0, 17620, 2, 0, 17624, 2, 0, 19456, 4, 0, 19904, 12, 0, 19904, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267595253958646_679_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267595253958646_679_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e83b6b4e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267595253958646_679_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,150 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4032, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267595312390540_680_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267595312390540_680_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f7702f6b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267595312390540_680_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1728, 8, 0, 2432, 8, 0, 6400, 9, 0, 6400, 9, 0, 6416, 9, 0, 6416, 9, 0, 10192, 4, 0, 10208, 4, 0, 11152, 4, 0, 11168, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267595505056744_682_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267595505056744_682_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a0962526 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267595505056744_682_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,262 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 8768, 1, 0, 9344, 1, 0, 10496, 1, 0, 11136, 1, 0, 12480, 15, 0, 12480, 15, 0, 12480, 15, 0, 12480, 15, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267595581459724_683_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267595581459724_683_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..64c44c62 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267595581459724_683_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,269 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3652, 8, 0, 3656, 8, 0, 3660, 8, 0, 3668, 8, 0, 3672, 8, 0, 3676, 8, 0, 4356, 8, 0, 4360, 8, 0, 4364, 8, 0, 4372, 8, 0, 4376, 8, 0, 4380, 8, 0, 5252, 8, 0, 5256, 8, 0, 5260, 8, 0, 5268, 8, 0, 5272, 8, 0, 5276, 8, 0, 6208, 8, 0, 6224, 8, 0, 7808, 5, 0, 7808, 5, 0, 7824, 5, 0, 7824, 5, 0, 9348, 4, 0, 9352, 4, 0, 9364, 4, 0, 9368, 4, 0, 10052, 1, 0, 10056, 1, 0, 10068, 1, 0, 10072, 1, 0, 10500, 4, 0, 10504, 4, 0, 10516, 4, 0, 10520, 4, 0, 11072, 5, 0, 11072, 5, 0, 12560, 1, 0, 12576, 1, 0, 12592, 1, 0, 14336, 4, 0, 14784, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267595910890889_685_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267595910890889_685_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e339ebc3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267595910890889_685_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 1920, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267595958345980_686_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267595958345980_686_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7810ecbe --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267595958345980_686_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1, 0, 1360, 1, 0, 4224, 1, 0, 4240, 1, 0, 5184, 1, 0, 5188, 1, 0, 5192, 1, 0, 5200, 1, 0, 5204, 1, 0, 5208, 1, 0, 5632, 1, 0, 5636, 1, 0, 5640, 1, 0, 5648, 1, 0, 5652, 1, 0, 5656, 1, 0, 7104, 1, 0, 7424, 4, 0, 7872, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267596038740805_687_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267596038740805_687_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..55a4e485 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267596038740805_687_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 13, 0, 2176, 13, 0, 2176, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267596082387060_688_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267596082387060_688_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..13080dc7 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267596082387060_688_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,786 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((242 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((257 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((WaveGetLaneIndex() == 3)) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((401 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((408 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (421 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (440 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (445 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (452 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (456 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (467 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (477 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (486 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (491 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (498 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (509 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (527 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((544 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (554 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (563 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (568 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (586 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (597 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (608 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (612 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (619 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (629 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (638 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (651 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (658 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (673 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (682 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (687 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 1, 0, 2256, 1, 0, 9088, 1, 0, 9408, 7, 0, 9408, 7, 0, 9408, 7, 0, 10320, 8, 0, 10336, 8, 0, 10352, 8, 0, 15508, 8, 0, 15512, 8, 0, 15524, 8, 0, 15528, 8, 0, 15540, 8, 0, 15544, 8, 0, 16468, 8, 0, 16472, 8, 0, 16484, 8, 0, 16488, 8, 0, 16500, 8, 0, 16504, 8, 0, 20672, 1, 0, 23552, 1, 0, 29888, 8, 0, 31872, 8, 0, 32576, 8, 0, 36352, 4, 0, 39616, 4, 0, 40256, 9, 0, 40256, 9, 0, 43968, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267596329690303_689_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267596329690303_689_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f009f158 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267596329690303_689_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1796, 1, 0, 1800, 1, 0, 1812, 1, 0, 1816, 1, 0, 3456, 4, 0, 4096, 9, 0, 4096, 9, 0, 5760, 4, 0, 5776, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267596404771748_690_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267596404771748_690_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..abc8e141 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267596404771748_690_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267596461480150_691_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267596461480150_691_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fc62d905 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267596461480150_691_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,219 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 5, 0, 976, 5, 0, 992, 5, 0, 992, 5, 0, 2132, 5, 0, 2132, 5, 0, 2136, 5, 0, 2136, 5, 0, 2140, 5, 0, 2140, 5, 0, 2148, 5, 0, 2148, 5, 0, 2152, 5, 0, 2152, 5, 0, 2156, 5, 0, 2156, 5, 0, 5652, 10, 0, 5652, 10, 0, 5656, 10, 0, 5656, 10, 0, 5660, 10, 0, 5660, 10, 0, 5668, 10, 0, 5668, 10, 0, 5672, 10, 0, 5672, 10, 0, 5676, 10, 0, 5676, 10, 0, 6224, 5, 0, 6224, 5, 0, 6240, 5, 0, 6240, 5, 0, 12672, 2, 0, 12416, 12, 0, 12416, 12, 0, 11264, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267596643952976_692_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267596643952976_692_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ce7e6f75 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267596643952976_692_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,504 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((260 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((275 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((290 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((297 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (406 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (415 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (427 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (448 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (459 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (470 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (479 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (484 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 1, 0, 5696, 2, 0, 14464, 4, 0, 14480, 4, 0, 18564, 8, 0, 18568, 8, 0, 18580, 8, 0, 18584, 8, 0, 18596, 8, 0, 18600, 8, 0, 19012, 8, 0, 19016, 8, 0, 19028, 8, 0, 19032, 8, 0, 19044, 8, 0, 19048, 8, 0, 21376, 8, 0, 30976, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267596917440892_693_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267596917440892_693_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a9bdb833 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267596917440892_693_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4928, 5, 0, 4928, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267596968813041_694_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267596968813041_694_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..16f79b67 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267596968813041_694_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,113 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2000, 2, 0, 3136, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267597022231452_695_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267597022231452_695_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..52a719fb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267597022231452_695_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,416 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((339 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((353 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((372 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((382 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((391 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((396 << 6) | (counter5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((403 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (410 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 1296, 1, 0, 1312, 1, 0, 19664, 2, 0, 19680, 2, 0, 21264, 2, 0, 21280, 2, 0, 25360, 4, 0, 25364, 4, 0, 25368, 4, 0, 25376, 4, 0, 25380, 4, 0, 25384, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267597187812908_696_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267597187812908_696_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a9b6fba3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267597187812908_696_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,311 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 3520, 4, 0, 3968, 8, 0, 6032, 2, 0, 6048, 2, 0, 8976, 2, 0, 8992, 2, 0, 10368, 12, 0, 10368, 12, 0, 11984, 1, 0, 12884, 1, 0, 12888, 1, 0, 14912, 4, 0, 16064, 8, 0, 17024, 12, 0, 17024, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267597315465407_698_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267597315465407_698_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c8e92f8c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267597315465407_698_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 291 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1812, 13, 0, 1812, 13, 0, 1812, 13, 0, 1816, 13, 0, 1816, 13, 0, 1816, 13, 0, 1820, 13, 0, 1820, 13, 0, 1820, 13, 0, 1828, 13, 0, 1828, 13, 0, 1828, 13, 0, 1832, 13, 0, 1832, 13, 0, 1832, 13, 0, 1836, 13, 0, 1836, 13, 0, 1836, 13, 0, 1844, 13, 0, 1844, 13, 0, 1844, 13, 0, 1848, 13, 0, 1848, 13, 0, 1848, 13, 0, 1852, 13, 0, 1852, 13, 0, 1852, 13, 0, 3476, 10, 0, 3476, 10, 0, 3480, 10, 0, 3480, 10, 0, 3484, 10, 0, 3484, 10, 0, 3492, 10, 0, 3492, 10, 0, 3496, 10, 0, 3496, 10, 0, 3500, 10, 0, 3500, 10, 0, 3508, 10, 0, 3508, 10, 0, 3512, 10, 0, 3512, 10, 0, 3516, 10, 0, 3516, 10, 0, 4436, 10, 0, 4436, 10, 0, 4440, 10, 0, 4440, 10, 0, 4444, 10, 0, 4444, 10, 0, 4452, 10, 0, 4452, 10, 0, 4456, 10, 0, 4456, 10, 0, 4460, 10, 0, 4460, 10, 0, 4468, 10, 0, 4468, 10, 0, 4472, 10, 0, 4472, 10, 0, 4476, 10, 0, 4476, 10, 0, 5396, 5, 0, 5396, 5, 0, 5400, 5, 0, 5400, 5, 0, 5404, 5, 0, 5404, 5, 0, 5412, 5, 0, 5412, 5, 0, 5416, 5, 0, 5416, 5, 0, 5420, 5, 0, 5420, 5, 0, 5428, 5, 0, 5428, 5, 0, 5432, 5, 0, 5432, 5, 0, 5436, 5, 0, 5436, 5, 0, 6352, 10, 0, 6352, 10, 0, 6368, 10, 0, 6368, 10, 0, 6384, 10, 0, 6384, 10, 0, 6976, 1, 0, 8144, 2, 0, 8160, 2, 0, 8448, 4, 0, 8896, 8, 0, 9920, 10, 0, 9920, 10, 0, 12160, 8, 0, 12176, 8, 0, 12192, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267597641053930_699_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267597641053930_699_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..94213c56 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267597641053930_699_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,450 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((298 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((342 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + continue; + } + } + } + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (371 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (415 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (435 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 11904, 9, 0, 11904, 9, 0, 19648, 4, 0, 23744, 1, 0, 24064, 7, 0, 24064, 7, 0, 24064, 7, 0, 24704, 9, 0, 24704, 9, 0, 26560, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267597852198675_701_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267597852198675_701_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..240f6f76 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267597852198675_701_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,392 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0, 1728, 1, 0, 6160, 10, 0, 6160, 10, 0, 6176, 10, 0, 6176, 10, 0, 9680, 10, 0, 9680, 10, 0, 9696, 10, 0, 9696, 10, 0, 10112, 8, 0, 15104, 8, 0, 15120, 8, 0, 16256, 9, 0, 16256, 9, 0, 16272, 9, 0, 16272, 9, 0, 16832, 1, 0, 16848, 1, 0, 17728, 4, 0, 20224, 9, 0, 20224, 9, 0, 19968, 2, 0, 19584, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267597964559491_702_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267597964559491_702_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d32aa8ff --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267597964559491_702_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,114 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267598813479354_708_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267598813479354_708_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cdc30467 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267598813479354_708_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,383 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((333 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if ((i4 == 1)) { + continue; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 2, 0, 4112, 2, 0, 4116, 2, 0, 4120, 2, 0, 4128, 2, 0, 4132, 2, 0, 4136, 2, 0, 5248, 2, 0, 5888, 1, 0, 7424, 2, 0, 8960, 2, 0, 9792, 2, 0, 13312, 4, 0, 15888, 8, 0, 15904, 8, 0, 15920, 8, 0, 16784, 8, 0, 16800, 8, 0, 16816, 8, 0, 18368, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267599042231163_710_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267599042231163_710_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4d91adb9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267599042231163_710_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,113 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1796, 9, 0, 1796, 9, 0, 1800, 9, 0, 1800, 9, 0, 1812, 9, 0, 1812, 9, 0, 1816, 9, 0, 1816, 9, 0, 2496, 1, 0, 2512, 1, 0, 3584, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267599106653071_711_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267599106653071_711_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ddc3746a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267599106653071_711_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,551 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((333 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 1)) { + break; + } + } + if ((counter3 == 1)) { + break; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((380 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (392 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((414 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((440 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((455 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((466 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((477 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((486 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (498 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (520 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (514 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5184, 1, 0, 7248, 1, 0, 8532, 1, 0, 8536, 1, 0, 8540, 1, 0, 9364, 1, 0, 9368, 1, 0, 9372, 1, 0, 10064, 1, 0, 16384, 8, 0, 22528, 5, 0, 22528, 5, 0, 25536, 8, 0, 26496, 8, 0, 26512, 8, 0, 29120, 2, 0, 29136, 2, 0, 29824, 8, 0, 29840, 8, 0, 31104, 2, 0, 31120, 2, 0, 33280, 6, 0, 33280, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267599327153045_712_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267599327153045_712_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..638bebee --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267599327153045_712_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,378 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((298 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((309 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 5248, 4, 0, 7936, 4, 0, 11476, 1, 0, 11480, 1, 0, 11484, 1, 0, 11492, 1, 0, 11496, 1, 0, 11500, 1, 0, 12480, 1, 0, 12928, 8, 0, 13824, 8, 0, 14528, 8, 0, 16720, 4, 0, 16736, 4, 0, 16752, 4, 0, 17936, 4, 0, 17952, 4, 0, 17968, 4, 0, 19796, 4, 0, 19800, 4, 0, 19812, 4, 0, 19816, 4, 0, 19828, 4, 0, 19832, 4, 0, 20368, 4, 0, 20384, 4, 0, 20400, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267599464658835_713_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267599464658835_713_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48292313 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267599464658835_713_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,544 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((27 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((391 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (406 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (421 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (446 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((464 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((480 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((489 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((500 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7296, 1, 0, 10820, 4, 0, 10824, 4, 0, 10836, 4, 0, 10840, 4, 0, 10852, 4, 0, 10856, 4, 0, 11728, 8, 0, 11744, 8, 0, 17424, 8, 0, 17440, 8, 0, 19408, 8, 0, 19424, 8, 0, 20368, 8, 0, 20384, 8, 0, 22016, 9, 0, 22016, 9, 0, 23808, 2, 0, 25024, 2, 0, 25040, 2, 0, 25056, 2, 0, 25984, 2, 0, 30740, 4, 0, 30744, 4, 0, 30748, 4, 0, 30756, 4, 0, 30760, 4, 0, 30764, 4, 0, 30772, 4, 0, 30776, 4, 0, 30780, 4, 0, 31316, 4, 0, 31320, 4, 0, 31324, 4, 0, 31332, 4, 0, 31336, 4, 0, 31340, 4, 0, 31348, 4, 0, 31352, 4, 0, 31356, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267599736784141_714_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267599736784141_714_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d68e597e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267599736784141_714_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,575 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((203 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((232 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter5 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((356 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((395 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((404 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (418 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((438 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 3)) { + counter9 = (counter9 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((452 << 6) | (counter8 << 4)) | (counter9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((465 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (475 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter10 = 0; + while ((counter10 < 3)) { + counter10 = (counter10 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((493 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i11 = 0; (i11 < 2); i11 = (i11 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((508 << 6) | (counter10 << 4)) | (i11 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter10 == 2)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (518 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter12 = 0; + while ((counter12 < 3)) { + counter12 = (counter12 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((538 << 6) | (counter12 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i13 = 0; (i13 < 2); i13 = (i13 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((561 << 6) | (counter12 << 4)) | (i13 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i13 == 1)) { + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 147 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 12996, 4, 0, 13000, 4, 0, 13012, 4, 0, 13016, 4, 0, 13028, 4, 0, 13032, 4, 0, 20608, 1, 0, 24192, 2, 0, 25856, 2, 0, 25872, 2, 0, 26752, 4, 0, 28048, 8, 0, 28064, 8, 0, 28080, 8, 0, 28948, 8, 0, 28952, 8, 0, 28956, 8, 0, 28964, 8, 0, 28968, 8, 0, 28972, 8, 0, 28980, 8, 0, 28984, 8, 0, 28988, 8, 0, 29776, 8, 0, 29792, 8, 0, 29808, 8, 0, 30400, 1, 0, 31568, 1, 0, 31584, 1, 0, 33152, 2, 0, 34448, 6, 0, 34448, 6, 0, 34464, 6, 0, 34464, 6, 0, 34480, 6, 0, 34480, 6, 0, 35920, 12, 0, 35920, 12, 0, 35924, 12, 0, 35924, 12, 0, 35936, 12, 0, 35936, 12, 0, 35940, 12, 0, 35940, 12, 0, 35952, 12, 0, 35952, 12, 0, 35956, 12, 0, 35956, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267600140130772_715_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267600140130772_715_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d557b261 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267600140130772_715_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,135 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1728, 8, 0, 3904, 8, 0, 5120, 2, 0, 5136, 2, 0, 5152, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267600199607279_716_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267600199607279_716_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..abc8e141 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267600199607279_716_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267600259222103_717_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267600259222103_717_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b17547e4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267600259222103_717_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,264 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1536, 2, 0, 1552, 2, 0, 3456, 4, 0, 3904, 8, 0, 8512, 1, 0, 11776, 1, 0, 12672, 4, 0, 13120, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267600480249960_719_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267600480249960_719_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5f4f613e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267600480249960_719_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,225 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 4, 0, 1280, 8, 0, 1024, 1, 0, 2688, 1, 0, 9216, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267600648839210_721_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267600648839210_721_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a16f84f9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267600648839210_721_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2196, 8, 0, 2200, 8, 0, 2212, 8, 0, 2216, 8, 0, 2228, 8, 0, 2232, 8, 0, 3264, 8, 0, 4224, 5, 0, 4224, 5, 0, 6592, 3, 0, 6592, 3, 0, 7232, 9, 0, 7232, 9, 0, 8128, 4, 0, 10048, 12, 0, 10048, 12, 0, 11008, 4, 0, 11024, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267600754447399_722_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267600754447399_722_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e0c428ce --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267600754447399_722_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,338 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 99 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 2688, 8, 0, 2704, 8, 0, 3844, 8, 0, 3848, 8, 0, 3852, 8, 0, 3860, 8, 0, 3864, 8, 0, 3868, 8, 0, 4416, 8, 0, 4432, 8, 0, 5456, 8, 0, 5472, 8, 0, 6736, 13, 0, 6736, 13, 0, 6736, 13, 0, 6752, 13, 0, 6752, 13, 0, 6752, 13, 0, 7376, 1, 0, 7392, 1, 0, 8272, 4, 0, 8288, 4, 0, 9872, 9, 0, 9872, 9, 0, 9888, 9, 0, 9888, 9, 0, 11008, 9, 0, 11008, 9, 0, 11648, 9, 0, 11648, 9, 0, 14400, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267601131482056_725_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267601131482056_725_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f4a95b46 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267601131482056_725_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 1, 0, 2000, 1, 0, 3136, 1, 0, 3152, 1, 0, 4032, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267601183946076_726_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267601183946076_726_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5a60b8e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267601183946076_726_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 7808, 6, 0, 7808, 6, 0, 9024, 8, 0, 9040, 8, 0, 9056, 8, 0, 10240, 8, 0, 10244, 8, 0, 10256, 8, 0, 10260, 8, 0, 10272, 8, 0, 10276, 8, 0, 11136, 8, 0, 11152, 8, 0, 11168, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267601246583708_727_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267601246583708_727_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..804cd9bf --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267601246583708_727_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,257 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 189 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 10, 0, 976, 10, 0, 992, 10, 0, 992, 10, 0, 1008, 10, 0, 1008, 10, 0, 2320, 9, 0, 2320, 9, 0, 2324, 9, 0, 2324, 9, 0, 2328, 9, 0, 2328, 9, 0, 2336, 9, 0, 2336, 9, 0, 2340, 9, 0, 2340, 9, 0, 2344, 9, 0, 2344, 9, 0, 2352, 9, 0, 2352, 9, 0, 2356, 9, 0, 2356, 9, 0, 2360, 9, 0, 2360, 9, 0, 3024, 13, 0, 3024, 13, 0, 3024, 13, 0, 3028, 13, 0, 3028, 13, 0, 3028, 13, 0, 3032, 13, 0, 3032, 13, 0, 3032, 13, 0, 3040, 13, 0, 3040, 13, 0, 3040, 13, 0, 3044, 13, 0, 3044, 13, 0, 3044, 13, 0, 3048, 13, 0, 3048, 13, 0, 3048, 13, 0, 3056, 13, 0, 3056, 13, 0, 3056, 13, 0, 3060, 13, 0, 3060, 13, 0, 3060, 13, 0, 3064, 13, 0, 3064, 13, 0, 3064, 13, 0, 4368, 9, 0, 4368, 9, 0, 4384, 9, 0, 4384, 9, 0, 6032, 1, 0, 6048, 1, 0, 6912, 4, 0, 7808, 1, 0, 8704, 4, 0, 12816, 8, 0, 12832, 8, 0, 12848, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267601499412846_729_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267601499412846_729_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..25b9d193 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267601499412846_729_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,682 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((347 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (390 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((409 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((418 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i8 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (434 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 2)) { + counter9 = (counter9 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((456 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (466 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (476 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (485 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (490 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (494 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (501 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (512 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter10 = 0; + while ((counter10 < 2)) { + counter10 = (counter10 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((526 << 6) | (counter10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (543 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (552 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (557 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (564 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (568 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + uint counter11 = 0; + while ((counter11 < 3)) { + counter11 = (counter11 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((590 << 6) | (counter11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (597 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (601 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (618 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2580, 1, 0, 2584, 1, 0, 2596, 1, 0, 2600, 1, 0, 2612, 1, 0, 2616, 1, 0, 9856, 8, 0, 14016, 1, 0, 23616, 12, 0, 23616, 12, 0, 27776, 8, 0, 32768, 2, 0, 33680, 2, 0, 33696, 2, 0, 35648, 4, 0, 39552, 14, 0, 39552, 14, 0, 39552, 14, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267601731850553_730_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267601731850553_730_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..01797e01 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267601731850553_730_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,265 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1152, 1, 0, 1472, 4, 0, 7424, 2, 0, 8848, 8, 0, 11344, 8, 0, 11348, 8, 0, 12560, 8, 0, 12564, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267601914332878_732_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267601914332878_732_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5f1080e9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267601914332878_732_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 9, 0, 1280, 9, 0, 6976, 4, 0, 7616, 5, 0, 7616, 5, 0, 9280, 10, 0, 9280, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267602068106052_734_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267602068106052_734_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1d9fe963 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267602068106052_734_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,110 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 4, 0, 912, 4, 0, 2500, 1, 0, 2504, 1, 0, 2508, 1, 0, 2516, 1, 0, 2520, 1, 0, 2524, 1, 0, 3204, 5, 0, 3204, 5, 0, 3208, 5, 0, 3208, 5, 0, 3212, 5, 0, 3212, 5, 0, 3220, 5, 0, 3220, 5, 0, 3224, 5, 0, 3224, 5, 0, 3228, 5, 0, 3228, 5, 0, 4036, 13, 0, 4036, 13, 0, 4036, 13, 0, 4040, 13, 0, 4040, 13, 0, 4040, 13, 0, 4044, 13, 0, 4044, 13, 0, 4044, 13, 0, 4052, 13, 0, 4052, 13, 0, 4052, 13, 0, 4056, 13, 0, 4056, 13, 0, 4056, 13, 0, 4060, 13, 0, 4060, 13, 0, 4060, 13, 0, 4608, 2, 0, 4624, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267602417156137_737_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267602417156137_737_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e1737d60 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267602417156137_737_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 8, 0, 3200, 8, 0, 3904, 9, 0, 3904, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267602466919494_738_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267602466919494_738_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..31c90af2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267602466919494_738_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 1, 0, 1280, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267602517138053_739_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267602517138053_739_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..51191426 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267602517138053_739_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 6400, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267602571736183_740_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267602571736183_740_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6511c455 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267602571736183_740_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2644, 2, 0, 2648, 2, 0, 2652, 2, 0, 2660, 2, 0, 2664, 2, 0, 2668, 2, 0, 2676, 2, 0, 2680, 2, 0, 2684, 2, 0, 3540, 2, 0, 3544, 2, 0, 3548, 2, 0, 3556, 2, 0, 3560, 2, 0, 3564, 2, 0, 3572, 2, 0, 3576, 2, 0, 3580, 2, 0, 4544, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267602656635722_741_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267602656635722_741_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..943d73f8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267602656635722_741_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 6, 0, 2560, 6, 0, 2304, 9, 0, 2304, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267602785254748_743_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267602785254748_743_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4d7fbd35 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267602785254748_743_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,271 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 6, 0, 2048, 6, 0, 1792, 9, 0, 1792, 9, 0, 2880, 8, 0, 5056, 2, 0, 6800, 4, 0, 6816, 4, 0, 8912, 4, 0, 8928, 4, 0, 10112, 8, 0, 10128, 8, 0, 11456, 1, 0, 11472, 1, 0, 12352, 4, 0, 12368, 4, 0, 13312, 6, 0, 13312, 6, 0, 13328, 6, 0, 13328, 6, 0, 13760, 3, 0, 13760, 3, 0, 13776, 3, 0, 13776, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267602894095250_744_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267602894095250_744_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ba0d3ad2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267602894095250_744_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((188 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 147 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1472, 10, 0, 1472, 10, 0, 3328, 10, 0, 3328, 10, 0, 4800, 1, 0, 4816, 1, 0, 5760, 5, 0, 5760, 5, 0, 5776, 5, 0, 5776, 5, 0, 6464, 5, 0, 6464, 5, 0, 8256, 4, 0, 8272, 4, 0, 8288, 4, 0, 10064, 5, 0, 10064, 5, 0, 10080, 5, 0, 10080, 5, 0, 11408, 6, 0, 11408, 6, 0, 11412, 6, 0, 11412, 6, 0, 11424, 6, 0, 11424, 6, 0, 11428, 6, 0, 11428, 6, 0, 12048, 1, 0, 12052, 1, 0, 12064, 1, 0, 12068, 1, 0, 13712, 2, 0, 13716, 2, 0, 13728, 2, 0, 13732, 2, 0, 14992, 5, 0, 14992, 5, 0, 14996, 5, 0, 14996, 5, 0, 15008, 5, 0, 15008, 5, 0, 15012, 5, 0, 15012, 5, 0, 15568, 10, 0, 15568, 10, 0, 15584, 10, 0, 15584, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267603166778224_745_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267603166778224_745_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0ed014cb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267603166778224_745_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,270 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 4, 0, 4736, 5, 0, 4736, 5, 0, 7760, 8, 0, 7776, 8, 0, 10192, 4, 0, 10208, 4, 0, 11220, 5, 0, 11220, 5, 0, 11224, 5, 0, 11224, 5, 0, 11228, 5, 0, 11228, 5, 0, 11236, 5, 0, 11236, 5, 0, 11240, 5, 0, 11240, 5, 0, 11244, 5, 0, 11244, 5, 0, 12884, 1, 0, 12888, 1, 0, 12892, 1, 0, 12900, 1, 0, 12904, 1, 0, 12908, 1, 0, 13908, 5, 0, 13908, 5, 0, 13912, 5, 0, 13912, 5, 0, 13916, 5, 0, 13916, 5, 0, 13924, 5, 0, 13924, 5, 0, 13928, 5, 0, 13928, 5, 0, 13932, 5, 0, 13932, 5, 0, 14608, 5, 0, 14608, 5, 0, 14624, 5, 0, 14624, 5, 0, 15312, 9, 0, 15312, 9, 0, 15328, 9, 0, 15328, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267603337090283_746_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267603337090283_746_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3f6bbffb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267603337090283_746_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,248 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 1, 0, 2368, 1, 0, 7168, 2, 0, 7184, 2, 0, 7200, 2, 0, 11008, 8, 0, 11024, 8, 0, 13312, 8, 0, 13328, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267603401069276_747_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267603401069276_747_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a83fcb9b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267603401069276_747_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267603444857376_748_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267603444857376_748_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2d87e179 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267603444857376_748_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,263 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 10, 0, 976, 10, 0, 992, 10, 0, 992, 10, 0, 2128, 9, 0, 2128, 9, 0, 2144, 9, 0, 2144, 9, 0, 2768, 1, 0, 2784, 1, 0, 4112, 8, 0, 4128, 8, 0, 4816, 9, 0, 4816, 9, 0, 4832, 9, 0, 4832, 9, 0, 5392, 5, 0, 5392, 5, 0, 5408, 5, 0, 5408, 5, 0, 6480, 1, 0, 6496, 1, 0, 7696, 1, 0, 7700, 1, 0, 7712, 1, 0, 7716, 1, 0, 8400, 1, 0, 8404, 1, 0, 8416, 1, 0, 8420, 1, 0, 11968, 4, 0, 13504, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267603547198191_749_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267603547198191_749_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8e7e4d81 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267603547198191_749_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 105 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 1312, 1, 0, 1328, 1, 0, 2388, 5, 0, 2388, 5, 0, 2392, 5, 0, 2392, 5, 0, 2404, 5, 0, 2404, 5, 0, 2408, 5, 0, 2408, 5, 0, 2420, 5, 0, 2420, 5, 0, 2424, 5, 0, 2424, 5, 0, 3668, 5, 0, 3668, 5, 0, 3672, 5, 0, 3672, 5, 0, 3684, 5, 0, 3684, 5, 0, 3688, 5, 0, 3688, 5, 0, 3700, 5, 0, 3700, 5, 0, 3704, 5, 0, 3704, 5, 0, 4560, 5, 0, 4560, 5, 0, 4576, 5, 0, 4576, 5, 0, 4592, 5, 0, 4592, 5, 0, 5120, 5, 0, 5120, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267603728765833_751_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267603728765833_751_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff9a9df6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267603728765833_751_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,324 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((161 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 5888, 2, 0, 10944, 5, 0, 10944, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267603830416046_752_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267603830416046_752_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e1b64580 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267603830416046_752_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,415 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((347 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (369 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2816, 1, 0, 2832, 1, 0, 3968, 1, 0, 3984, 1, 0, 4672, 1, 0, 4688, 1, 0, 5632, 1, 0, 5648, 1, 0, 6080, 1, 0, 6096, 1, 0, 10260, 4, 0, 10264, 4, 0, 10276, 4, 0, 10280, 4, 0, 12800, 1, 0, 13376, 1, 0, 14400, 5, 0, 14400, 5, 0, 15040, 1, 0, 15936, 4, 0, 16384, 4, 0, 20224, 2, 0, 23360, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267604073475588_753_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267604073475588_753_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4ed35dcd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267604073475588_753_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 1, 0, 6336, 6, 0, 6336, 6, 0, 8912, 8, 0, 8928, 8, 0, 8944, 8, 0, 9616, 8, 0, 9632, 8, 0, 9648, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267604136554898_754_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267604136554898_754_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1a7729cb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267604136554898_754_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 4864, 4, 0, 5312, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267604192508769_755_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267604192508769_755_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa7e9b84 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267604192508769_755_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2704, 4, 0, 2720, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267604253051358_756_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267604253051358_756_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..74662d58 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267604253051358_756_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,606 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((362 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((426 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((444 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((457 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((472 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((487 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((500 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (508 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((530 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((547 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((556 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((571 << 6) | (i6 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((582 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 6, 0, 1472, 6, 0, 2368, 4, 0, 3456, 4, 0, 3472, 4, 0, 6144, 10, 0, 6144, 10, 0, 7808, 8, 0, 7824, 8, 0, 15104, 1, 0, 17280, 1, 0, 18944, 2, 0, 20480, 2, 0, 21824, 2, 0, 22144, 6, 0, 22144, 6, 0, 23184, 4, 0, 23200, 4, 0, 23216, 4, 0, 24704, 1, 0, 25280, 1, 0, 25920, 1, 0, 30208, 2, 0, 30224, 2, 0, 30240, 2, 0, 31168, 2, 0, 31184, 2, 0, 31200, 2, 0, 32512, 4, 0, 35584, 8, 0, 35600, 8, 0, 35616, 8, 0, 36544, 8, 0, 36548, 8, 0, 36552, 8, 0, 36560, 8, 0, 36564, 8, 0, 36568, 8, 0, 36576, 8, 0, 36580, 8, 0, 36584, 8, 0, 37248, 8, 0, 37264, 8, 0, 37280, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267604568788098_757_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267604568788098_757_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ed525500 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267604568788098_757_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,457 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((225 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((255 << 6) | (counter4 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((318 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((328 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((339 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((354 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + uint counter9 = 0; + while ((counter9 < 3)) { + counter9 = (counter9 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((368 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter10 = 0; + while ((counter10 < 3)) { + counter10 = (counter10 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((389 << 6) | (counter9 << 4)) | (counter10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((398 << 6) | (counter9 << 4)) | (counter10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((409 << 6) | (counter9 << 4)) | (counter10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((416 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (457 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (451 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (445 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1664, 4, 0, 5504, 1, 0, 6464, 1, 0, 6480, 1, 0, 6496, 1, 0, 7620, 1, 0, 7624, 1, 0, 7628, 1, 0, 7636, 1, 0, 7640, 1, 0, 7644, 1, 0, 7652, 1, 0, 7656, 1, 0, 7660, 1, 0, 8320, 1, 0, 12880, 3, 0, 12880, 3, 0, 12896, 3, 0, 12896, 3, 0, 16336, 1, 0, 16340, 1, 0, 16344, 1, 0, 16352, 1, 0, 16356, 1, 0, 16360, 1, 0, 17040, 2, 0, 17056, 2, 0, 18000, 1, 0, 18016, 1, 0, 18944, 1, 0, 18960, 1, 0, 18976, 1, 0, 20356, 2, 0, 20360, 2, 0, 20364, 2, 0, 20372, 2, 0, 20376, 2, 0, 20380, 2, 0, 20388, 2, 0, 20392, 2, 0, 20396, 2, 0, 20996, 4, 0, 21000, 4, 0, 21004, 4, 0, 21012, 4, 0, 21016, 4, 0, 21020, 4, 0, 21028, 4, 0, 21032, 4, 0, 21036, 4, 0, 21700, 1, 0, 21704, 1, 0, 21708, 1, 0, 21716, 1, 0, 21720, 1, 0, 21724, 1, 0, 21732, 1, 0, 21736, 1, 0, 21740, 1, 0, 22660, 2, 0, 22664, 2, 0, 22668, 2, 0, 22676, 2, 0, 22680, 2, 0, 22684, 2, 0, 22692, 2, 0, 22696, 2, 0, 22700, 2, 0, 23568, 8, 0, 23584, 8, 0, 23600, 8, 0, 29248, 14, 0, 29248, 14, 0, 29248, 14, 0, 28864, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267605192977421_758_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267605192977421_758_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0ad28a8c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267605192977421_758_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,292 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((135 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 105 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 3776, 9, 0, 3776, 9, 0, 4672, 4, 0, 6288, 1, 0, 6304, 1, 0, 7444, 2, 0, 7448, 2, 0, 7460, 2, 0, 7464, 2, 0, 8084, 9, 0, 8084, 9, 0, 8088, 9, 0, 8088, 9, 0, 8100, 9, 0, 8100, 9, 0, 8104, 9, 0, 8104, 9, 0, 8660, 1, 0, 8664, 1, 0, 8676, 1, 0, 8680, 1, 0, 9684, 10, 0, 9684, 10, 0, 9688, 10, 0, 9688, 10, 0, 9700, 10, 0, 9700, 10, 0, 9704, 10, 0, 9704, 10, 0, 10256, 1, 0, 10272, 1, 0, 11200, 3, 0, 11200, 3, 0, 11840, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267605473612372_760_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267605473612372_760_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1a82cefc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267605473612372_760_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267605529450359_761_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267605529450359_761_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..39294bcb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267605529450359_761_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,403 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((282 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((292 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((301 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((306 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((313 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((317 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((328 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1216, 9, 0, 1216, 9, 0, 2432, 4, 0, 3776, 1, 0, 14864, 4, 0, 14880, 4, 0, 15568, 4, 0, 15584, 4, 0, 16384, 4, 0, 20048, 8, 0, 20052, 8, 0, 20064, 8, 0, 20068, 8, 0, 20080, 8, 0, 20084, 8, 0, 21008, 8, 0, 21012, 8, 0, 21024, 8, 0, 21028, 8, 0, 21040, 8, 0, 21044, 8, 0, 21584, 8, 0, 21600, 8, 0, 21616, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267605669579813_762_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267605669579813_762_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5e774f97 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267605669579813_762_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,363 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((293 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((308 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((326 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7296, 10, 0, 7296, 10, 0, 7312, 10, 0, 7312, 10, 0, 7328, 10, 0, 7328, 10, 0, 8192, 10, 0, 8192, 10, 0, 9536, 1, 0, 12436, 4, 0, 12440, 4, 0, 12444, 4, 0, 12452, 4, 0, 12456, 4, 0, 12460, 4, 0, 12864, 8, 0, 13760, 1, 0, 14656, 4, 0, 15808, 8, 0, 18752, 8, 0, 18756, 8, 0, 18768, 8, 0, 18772, 8, 0, 19712, 8, 0, 19716, 8, 0, 19728, 8, 0, 19732, 8, 0, 20864, 8, 0, 20880, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267605896397407_764_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267605896397407_764_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5516d284 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267605896397407_764_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,439 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((213 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((223 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((232 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((301 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((353 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((363 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((372 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((383 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 9, 0, 1280, 9, 0, 1984, 9, 0, 1984, 9, 0, 12432, 4, 0, 12448, 4, 0, 13648, 4, 0, 13652, 4, 0, 13656, 4, 0, 13664, 4, 0, 13668, 4, 0, 13672, 4, 0, 15184, 4, 0, 15188, 4, 0, 15192, 4, 0, 15200, 4, 0, 15204, 4, 0, 15208, 4, 0, 17408, 5, 0, 17408, 5, 0, 20240, 4, 0, 20256, 4, 0, 22608, 8, 0, 22612, 8, 0, 22624, 8, 0, 22628, 8, 0, 22640, 8, 0, 22644, 8, 0, 24528, 8, 0, 24544, 8, 0, 24560, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267606349961701_765_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267606349961701_765_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..98462530 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267606349961701_765_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 1, 0, 3792, 1, 0, 3808, 1, 0, 7872, 4, 0, 8512, 4, 0, 10496, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267606414173508_766_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267606414173508_766_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3aa8085b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267606414173508_766_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2068, 1, 0, 2072, 1, 0, 2084, 1, 0, 2088, 1, 0, 2100, 1, 0, 2104, 1, 0, 3732, 1, 0, 3736, 1, 0, 3748, 1, 0, 3752, 1, 0, 3764, 1, 0, 3768, 1, 0, 8384, 2, 0, 8400, 2, 0, 8416, 2, 0, 11584, 8, 0, 12480, 5, 0, 12480, 5, 0, 13120, 8, 0, 14336, 8, 0, 14352, 8, 0, 14368, 8, 0, 14976, 9, 0, 14976, 9, 0, 15872, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267606590541859_767_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267606590541859_767_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..614460b5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267606590541859_767_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 2816, 5, 0, 2816, 5, 0, 4368, 1, 0, 6864, 5, 0, 6864, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267606649795909_768_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267606649795909_768_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..800e1eff --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267606649795909_768_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 9, 0, 2048, 9, 0, 1792, 4, 0, 4416, 1, 0, 5120, 1, 0, 10944, 4, 0, 11392, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267606810974500_770_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267606810974500_770_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..004d3325 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267606810974500_770_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,399 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((301 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((352 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((363 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((382 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((392 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5504, 4, 0, 11344, 1, 0, 11360, 1, 0, 12048, 1, 0, 12064, 1, 0, 12624, 4, 0, 12640, 4, 0, 13776, 10, 0, 13776, 10, 0, 13792, 10, 0, 13792, 10, 0, 13808, 10, 0, 13808, 10, 0, 15952, 10, 0, 15952, 10, 0, 15968, 10, 0, 15968, 10, 0, 15984, 10, 0, 15984, 10, 0, 23552, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267606983047197_771_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267606983047197_771_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e7c4f68d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267606983047197_771_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1744, 10, 0, 1744, 10, 0, 1760, 10, 0, 1760, 10, 0, 3156, 8, 0, 3160, 8, 0, 3164, 8, 0, 3172, 8, 0, 3176, 8, 0, 3180, 8, 0, 4116, 2, 0, 4120, 2, 0, 4124, 2, 0, 4132, 2, 0, 4136, 2, 0, 4140, 2, 0, 4816, 8, 0, 4832, 8, 0, 6016, 6, 0, 6016, 6, 0, 6032, 6, 0, 6032, 6, 0, 7872, 4, 0, 7876, 4, 0, 7880, 4, 0, 7888, 4, 0, 7892, 4, 0, 7896, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267607348419667_772_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267607348419667_772_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..19842872 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267607348419667_772_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((159 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [704, 8, 0, 1344, 8, 0, 7696, 10, 0, 7696, 10, 0, 7700, 10, 0, 7700, 10, 0, 7712, 10, 0, 7712, 10, 0, 7716, 10, 0, 7716, 10, 0, 8336, 4, 0, 8340, 4, 0, 8352, 4, 0, 8356, 4, 0, 9488, 10, 0, 9488, 10, 0, 9492, 10, 0, 9492, 10, 0, 9504, 10, 0, 9504, 10, 0, 9508, 10, 0, 9508, 10, 0, 10192, 1, 0, 10196, 1, 0, 10208, 1, 0, 10212, 1, 0, 10896, 10, 0, 10896, 10, 0, 10912, 10, 0, 10912, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267607493761985_773_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267607493761985_773_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a1e34ee7 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267607493761985_773_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 4224, 4, 0, 5504, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267607559480757_774_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267607559480757_774_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..442ced49 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267607559480757_774_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 3648, 4, 0, 10112, 4, 0, 10560, 8, 0, 12224, 5, 0, 12224, 5, 0, 12240, 5, 0, 12240, 5, 0, 12256, 5, 0, 12256, 5, 0, 15232, 1, 0, 15248, 1, 0, 15264, 1, 0, 16320, 10, 0, 16320, 10, 0, 16336, 10, 0, 16336, 10, 0, 16352, 10, 0, 16352, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267607671117355_775_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267607671117355_775_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..87d675fb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267607671117355_775_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,400 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 1, 0, 3264, 8, 0, 8768, 1, 0, 8784, 1, 0, 8800, 1, 0, 9472, 1, 0, 9488, 1, 0, 9504, 1, 0, 10880, 10, 0, 10880, 10, 0, 11968, 8, 0, 12864, 4, 0, 13824, 4, 0, 14656, 1, 0, 16592, 4, 0, 18432, 8, 0, 19344, 8, 0, 19360, 8, 0, 19376, 8, 0, 20288, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267608024277626_778_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267608024277626_778_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..62cade5c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267608024277626_778_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 8, 0, 1280, 5, 0, 1280, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267608158225391_780_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267608158225391_780_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..efc53df5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267608158225391_780_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,322 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3588, 4, 0, 3592, 4, 0, 3604, 4, 0, 3608, 4, 0, 3620, 4, 0, 3624, 4, 0, 4548, 4, 0, 4552, 4, 0, 4564, 4, 0, 4568, 4, 0, 4580, 4, 0, 4584, 4, 0, 5504, 4, 0, 5520, 4, 0, 5536, 4, 0, 8960, 5, 0, 8960, 5, 0, 8576, 8, 0, 7936, 2, 0, 10432, 1, 0, 13504, 4, 0, 14784, 8, 0, 15424, 8, 0, 16320, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267608251585419_781_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267608251585419_781_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4af8dd00 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267608251585419_781_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,324 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 4480, 9, 0, 4480, 9, 0, 5120, 8, 0, 9232, 8, 0, 9248, 8, 0, 10368, 8, 0, 10944, 8, 0, 11584, 1, 0, 14464, 2, 0, 15424, 2, 0, 15744, 4, 0, 16448, 2, 0, 17088, 9, 0, 17088, 9, 0, 17984, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267608357415603_782_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267608357415603_782_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267608357415603_782_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267608411993862_783_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267608411993862_783_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..54fe4b5d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267608411993862_783_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,253 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 9, 0, 1472, 9, 0, 2432, 1, 0, 2448, 1, 0, 2464, 1, 0, 2880, 8, 0, 2896, 8, 0, 2912, 8, 0, 11328, 6, 0, 11328, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267689066180916_786_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267689066180916_786_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2373976f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267689066180916_786_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,103 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 10, 0, 1152, 10, 0, 1168, 10, 0, 1168, 10, 0, 2752, 2, 0, 2756, 2, 0, 2768, 2, 0, 2772, 2, 0, 3200, 1, 0, 3204, 1, 0, 3216, 1, 0, 3220, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267689161868983_787_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267689161868983_787_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e0430dac --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267689161868983_787_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if ((WaveGetLaneIndex() < 2)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 9, 0, 3264, 9, 0, 4352, 1, 0, 4368, 1, 0, 5956, 13, 0, 5956, 13, 0, 5956, 13, 0, 5960, 13, 0, 5960, 13, 0, 5960, 13, 0, 5972, 13, 0, 5972, 13, 0, 5972, 13, 0, 5976, 13, 0, 5976, 13, 0, 5976, 13, 0, 6660, 13, 0, 6660, 13, 0, 6660, 13, 0, 6664, 13, 0, 6664, 13, 0, 6664, 13, 0, 6676, 13, 0, 6676, 13, 0, 6676, 13, 0, 6680, 13, 0, 6680, 13, 0, 6680, 13, 0, 8068, 2, 0, 8072, 2, 0, 8084, 2, 0, 8088, 2, 0, 8772, 5, 0, 8772, 5, 0, 8776, 5, 0, 8776, 5, 0, 8788, 5, 0, 8788, 5, 0, 8792, 5, 0, 8792, 5, 0, 9216, 8, 0, 9232, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267689321279287_788_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267689321279287_788_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ba453216 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267689321279287_788_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 2)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 4, 0, 1232, 4, 0, 2496, 8, 0, 2512, 8, 0, 3072, 4, 0, 3088, 4, 0, 4752, 2, 0, 4768, 2, 0, 4784, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267689392536141_789_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267689392536141_789_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3460c72d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267689392536141_789_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,300 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((180 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((204 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((211 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((215 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 207 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 5, 0, 1936, 5, 0, 1940, 5, 0, 1940, 5, 0, 1952, 5, 0, 1952, 5, 0, 1956, 5, 0, 1956, 5, 0, 1968, 5, 0, 1968, 5, 0, 1972, 5, 0, 1972, 5, 0, 2640, 5, 0, 2640, 5, 0, 2644, 5, 0, 2644, 5, 0, 2656, 5, 0, 2656, 5, 0, 2660, 5, 0, 2660, 5, 0, 2672, 5, 0, 2672, 5, 0, 2676, 5, 0, 2676, 5, 0, 3088, 2, 0, 3092, 2, 0, 3104, 2, 0, 3108, 2, 0, 3120, 2, 0, 3124, 2, 0, 4496, 14, 0, 4496, 14, 0, 4496, 14, 0, 4500, 14, 0, 4500, 14, 0, 4500, 14, 0, 4512, 14, 0, 4512, 14, 0, 4512, 14, 0, 4516, 14, 0, 4516, 14, 0, 4516, 14, 0, 4528, 14, 0, 4528, 14, 0, 4528, 14, 0, 4532, 14, 0, 4532, 14, 0, 4532, 14, 0, 5776, 4, 0, 6992, 1, 0, 6996, 1, 0, 7000, 1, 0, 7824, 4, 0, 11540, 1, 0, 11544, 1, 0, 11556, 1, 0, 11560, 1, 0, 12180, 1, 0, 12184, 1, 0, 12196, 1, 0, 12200, 1, 0, 13076, 4, 0, 13080, 4, 0, 13092, 4, 0, 13096, 4, 0, 14484, 4, 0, 14488, 4, 0, 14500, 4, 0, 14504, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267689753069824_790_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267689753069824_790_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..340e8451 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267689753069824_790_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,127 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2816, 8, 0, 4736, 8, 0, 4740, 8, 0, 4752, 8, 0, 4756, 8, 0, 4768, 8, 0, 4772, 8, 0, 5312, 8, 0, 5328, 8, 0, 5344, 8, 0, 6272, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267689828942235_791_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267689828942235_791_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c075069a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267689828942235_791_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 1, 0, 2752, 1, 0, 3072, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267690022173552_794_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267690022173552_794_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a3120c78 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267690022173552_794_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,258 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((252 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 8, 0, 2304, 8, 0, 2320, 8, 0, 2336, 8, 0, 13312, 4, 0, 17536, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267690092517305_795_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267690092517305_795_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..30b0240f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267690092517305_795_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,346 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (338 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1488, 2, 0, 1504, 2, 0, 2624, 4, 0, 4480, 1, 0, 5632, 1, 0, 6336, 1, 0, 12096, 4, 0, 14208, 8, 0, 14224, 8, 0, 17984, 8, 0, 18000, 8, 0, 22016, 9, 0, 22016, 9, 0, 20608, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267690202511155_796_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267690202511155_796_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..20c62841 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267690202511155_796_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,415 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((323 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((348 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + if ((i6 == 2)) { + break; + } + } + } else { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((375 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((391 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((402 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 123 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4032, 1, 0, 4036, 1, 0, 4040, 1, 0, 4048, 1, 0, 4052, 1, 0, 4056, 1, 0, 4864, 1, 0, 4880, 1, 0, 6848, 4, 0, 6864, 4, 0, 6880, 4, 0, 8832, 4, 0, 8848, 4, 0, 8864, 4, 0, 11008, 8, 0, 15248, 1, 0, 15264, 1, 0, 15280, 1, 0, 16592, 8, 0, 16608, 8, 0, 16624, 8, 0, 17040, 1, 0, 17056, 1, 0, 17072, 1, 0, 17920, 4, 0, 18944, 1, 0, 19904, 3, 0, 19904, 3, 0, 19920, 3, 0, 19920, 3, 0, 19936, 3, 0, 19936, 3, 0, 21824, 2, 0, 21840, 2, 0, 21856, 2, 0, 24000, 8, 0, 24016, 8, 0, 24032, 8, 0, 25728, 8, 0, 25744, 8, 0, 25760, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267690772800248_797_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267690772800248_797_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..59b40921 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267690772800248_797_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 6, 0, 1088, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267690874224695_799_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267690874224695_799_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bf416c63 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267690874224695_799_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,194 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7424, 10, 0, 7424, 10, 0, 8336, 8, 0, 8352, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267690925563686_800_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267690925563686_800_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eb6cf05d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267690925563686_800_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,77 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 3, 0, 896, 3, 0, 912, 3, 0, 912, 3, 0, 928, 3, 0, 928, 3, 0, 1472, 3, 0, 1472, 3, 0, 1488, 3, 0, 1488, 3, 0, 1504, 3, 0, 1504, 3, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267690975296190_801_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267690975296190_801_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..229e8438 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267690975296190_801_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,344 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 2944, 8, 0, 3840, 8, 0, 5248, 8, 0, 11200, 9, 0, 11200, 9, 0, 13264, 2, 0, 14224, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267691061450085_802_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267691061450085_802_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267691061450085_802_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267691116497037_803_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267691116497037_803_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a62a2061 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267691116497037_803_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267691171697154_804_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267691171697154_804_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eb280b12 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267691171697154_804_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 105 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2752, 15, 0, 2752, 15, 0, 2752, 15, 0, 2752, 15, 0, 3392, 8, 0, 5376, 8, 0, 7488, 1, 0, 7504, 1, 0, 7520, 1, 0, 8448, 4, 0, 8452, 4, 0, 8464, 4, 0, 8468, 4, 0, 8480, 4, 0, 8484, 4, 0, 9088, 1, 0, 9092, 1, 0, 9104, 1, 0, 9108, 1, 0, 9120, 1, 0, 9124, 1, 0, 9984, 4, 0, 9988, 4, 0, 10000, 4, 0, 10004, 4, 0, 10016, 4, 0, 10020, 4, 0, 10688, 1, 0, 10692, 1, 0, 10704, 1, 0, 10708, 1, 0, 10720, 1, 0, 10724, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267691555510379_806_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267691555510379_806_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..124529e9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267691555510379_806_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,280 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 5, 0, 1600, 5, 0, 2240, 1, 0, 13056, 4, 0, 14208, 1, 0, 15552, 1, 0, 17792, 8, 0, 18752, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267691622231197_807_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267691622231197_807_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..91f015f2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267691622231197_807_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,192 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((i0 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 8, 0, 912, 8, 0, 2436, 6, 0, 2436, 6, 0, 2440, 6, 0, 2440, 6, 0, 2452, 6, 0, 2452, 6, 0, 2456, 6, 0, 2456, 6, 0, 3332, 5, 0, 3332, 5, 0, 3336, 5, 0, 3336, 5, 0, 3348, 5, 0, 3348, 5, 0, 3352, 5, 0, 3352, 5, 0, 4420, 14, 0, 4420, 14, 0, 4420, 14, 0, 4424, 14, 0, 4424, 14, 0, 4424, 14, 0, 4436, 14, 0, 4436, 14, 0, 4436, 14, 0, 4440, 14, 0, 4440, 14, 0, 4440, 14, 0, 6464, 2, 0, 10880, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267691749750916_808_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267691749750916_808_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0a1b4715 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267691749750916_808_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 4, 0, 3008, 9, 0, 3008, 9, 0, 3904, 4, 0, 5328, 14, 0, 5328, 14, 0, 5328, 14, 0, 5344, 14, 0, 5344, 14, 0, 5344, 14, 0, 5360, 14, 0, 5360, 14, 0, 5360, 14, 0, 6288, 5, 0, 6288, 5, 0, 6304, 5, 0, 6304, 5, 0, 6320, 5, 0, 6320, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267691831101967_809_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267691831101967_809_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a83fcb9b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267691831101967_809_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267691875763229_810_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267691875763229_810_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..16e617f2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267691875763229_810_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267692335300321_814_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267692335300321_814_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..60054b37 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267692335300321_814_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,446 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (357 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 2640, 9, 0, 2640, 9, 0, 2656, 9, 0, 2656, 9, 0, 3856, 9, 0, 3856, 9, 0, 3860, 9, 0, 3860, 9, 0, 3872, 9, 0, 3872, 9, 0, 3876, 9, 0, 3876, 9, 0, 4560, 4, 0, 4564, 4, 0, 4576, 4, 0, 4580, 4, 0, 5440, 9, 0, 5440, 9, 0, 6080, 8, 0, 19392, 8, 0, 20672, 5, 0, 20672, 5, 0, 21312, 1, 0, 23872, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267692471890012_815_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267692471890012_815_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..51d2ea4c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267692471890012_815_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,369 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 2576, 1, 0, 7376, 1, 0, 10176, 2, 0, 10192, 2, 0, 12928, 2, 0, 12944, 2, 0, 18560, 4, 0, 19008, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267692611297534_817_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267692611297534_817_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3895a223 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267692611297534_817_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((25 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1616, 13, 0, 1616, 13, 0, 1616, 13, 0, 1620, 13, 0, 1620, 13, 0, 1620, 13, 0, 1624, 13, 0, 1624, 13, 0, 1624, 13, 0, 1632, 13, 0, 1632, 13, 0, 1632, 13, 0, 1636, 13, 0, 1636, 13, 0, 1636, 13, 0, 1640, 13, 0, 1640, 13, 0, 1640, 13, 0, 1648, 13, 0, 1648, 13, 0, 1648, 13, 0, 1652, 13, 0, 1652, 13, 0, 1652, 13, 0, 1656, 13, 0, 1656, 13, 0, 1656, 13, 0, 2256, 9, 0, 2256, 9, 0, 2260, 9, 0, 2260, 9, 0, 2264, 9, 0, 2264, 9, 0, 2272, 9, 0, 2272, 9, 0, 2276, 9, 0, 2276, 9, 0, 2280, 9, 0, 2280, 9, 0, 2288, 9, 0, 2288, 9, 0, 2292, 9, 0, 2292, 9, 0, 2296, 9, 0, 2296, 9, 0, 3152, 4, 0, 3156, 4, 0, 3160, 4, 0, 3168, 4, 0, 3172, 4, 0, 3176, 4, 0, 3184, 4, 0, 3188, 4, 0, 3192, 4, 0, 3856, 9, 0, 3856, 9, 0, 3860, 9, 0, 3860, 9, 0, 3864, 9, 0, 3864, 9, 0, 3872, 9, 0, 3872, 9, 0, 3876, 9, 0, 3876, 9, 0, 3880, 9, 0, 3880, 9, 0, 3888, 9, 0, 3888, 9, 0, 3892, 9, 0, 3892, 9, 0, 3896, 9, 0, 3896, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267693762641747_819_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267693762641747_819_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..468202bf --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267693762641747_819_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((i1 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 10, 0, 2496, 10, 0, 2512, 10, 0, 2512, 10, 0, 3072, 10, 0, 3072, 10, 0, 3088, 10, 0, 3088, 10, 0, 3776, 10, 0, 3776, 10, 0, 3792, 10, 0, 3792, 10, 0, 5312, 8, 0, 5328, 8, 0, 6464, 13, 0, 6464, 13, 0, 6464, 13, 0, 6480, 13, 0, 6480, 13, 0, 6480, 13, 0, 7104, 5, 0, 7104, 5, 0, 7120, 5, 0, 7120, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267693947020415_821_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267693947020415_821_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..731bd524 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267693947020415_821_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((104 << 6) | (counter0 << 4)) | (counter1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((119 << 6) | (counter0 << 4)) | (counter1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (counter0 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (counter0 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (counter0 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 183 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 4, 0, 2944, 3, 0, 2944, 3, 0, 2688, 8, 0, 5204, 1, 0, 5208, 1, 0, 5212, 1, 0, 5220, 1, 0, 5224, 1, 0, 5228, 1, 0, 5236, 1, 0, 5240, 1, 0, 5244, 1, 0, 8848, 4, 0, 8852, 4, 0, 8856, 4, 0, 8864, 4, 0, 8868, 4, 0, 8872, 4, 0, 8880, 4, 0, 8884, 4, 0, 8888, 4, 0, 10512, 6, 0, 10512, 6, 0, 10516, 6, 0, 10516, 6, 0, 10520, 6, 0, 10520, 6, 0, 10528, 6, 0, 10528, 6, 0, 10532, 6, 0, 10532, 6, 0, 10536, 6, 0, 10536, 6, 0, 10544, 6, 0, 10544, 6, 0, 10548, 6, 0, 10548, 6, 0, 10552, 6, 0, 10552, 6, 0, 11216, 10, 0, 11216, 10, 0, 11220, 10, 0, 11220, 10, 0, 11224, 10, 0, 11224, 10, 0, 11232, 10, 0, 11232, 10, 0, 11236, 10, 0, 11236, 10, 0, 11240, 10, 0, 11240, 10, 0, 11248, 10, 0, 11248, 10, 0, 11252, 10, 0, 11252, 10, 0, 11256, 10, 0, 11256, 10, 0, 12240, 8, 0, 12256, 8, 0, 12272, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267694393299416_822_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267694393299416_822_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a67a60a4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267694393299416_822_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,149 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 8, 0, 992, 8, 0, 2128, 1, 0, 2144, 1, 0, 2768, 1, 0, 2784, 1, 0, 4112, 8, 0, 4128, 8, 0, 4816, 1, 0, 4832, 1, 0, 5264, 12, 0, 5264, 12, 0, 5280, 12, 0, 5280, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267694467906413_823_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267694467906413_823_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5495002b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267694467906413_823_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,110 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1744, 2, 0, 1760, 2, 0, 1776, 2, 0, 3264, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267694521015870_824_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267694521015870_824_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c89a082e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267694521015870_824_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,157 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 1, 0, 2320, 1, 0, 3264, 9, 0, 3264, 9, 0, 3280, 9, 0, 3280, 9, 0, 4800, 1, 0, 4816, 1, 0, 5376, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267694660262561_826_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267694660262561_826_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2882065f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267694660262561_826_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5188, 2, 0, 5204, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267694715018606_827_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267694715018606_827_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..77eb312b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267694715018606_827_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 3072, 4, 0, 3520, 8, 0, 5136, 8, 0, 5152, 8, 0, 6036, 8, 0, 6052, 8, 0, 6484, 8, 0, 6500, 8, 0, 7552, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267694804017245_828_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267694804017245_828_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e339ebc3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267694804017245_828_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 1920, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267694863027716_829_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267694863027716_829_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f18d94f2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267694863027716_829_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,764 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((238 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((405 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((418 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (430 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (439 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (444 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((476 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (487 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (505 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (519 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (530 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i8 = 0; (i8 < 2); i8 = (i8 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((545 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i8 == 1)) { + continue; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (568 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i9 = 0; (i9 < 2); i9 = (i9 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((585 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i10 = 0; (i10 < 2); i10 = (i10 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((608 << 6) | (i9 << 4)) | (i10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i10 == 1)) { + continue; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (622 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (631 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter11 = 0; + while ((counter11 < 3)) { + counter11 = (counter11 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((645 << 6) | (counter11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i12 = 0; (i12 < 3); i12 = (i12 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((664 << 6) | (counter11 << 4)) | (i12 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((671 << 6) | (counter11 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (680 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (693 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter13 = 0; + while ((counter13 < 3)) { + counter13 = (counter13 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((715 << 6) | (counter13 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (722 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (731 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i14 = 0; (i14 < 2); i14 = (i14 + 1)) { + for (uint i15 = 0; (i15 < 2); i15 = (i15 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((762 << 6) | (i14 << 4)) | (i15 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i15 == 1)) { + continue; + } + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (769 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 1, 0, 2576, 1, 0, 2592, 1, 0, 4736, 2, 0, 10816, 4, 0, 10832, 4, 0, 10848, 4, 0, 15808, 4, 0, 15824, 4, 0, 15840, 4, 0, 16448, 8, 0, 37440, 4, 0, 37456, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267695140751384_830_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267695140751384_830_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..32531862 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267695140751384_830_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,188 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0, 1104, 1, 0, 1120, 1, 0, 4288, 4, 0, 4304, 4, 0, 4320, 4, 0, 5504, 8, 0, 5520, 8, 0, 7232, 8, 0, 7236, 8, 0, 7248, 8, 0, 7252, 8, 0, 8832, 8, 0, 8848, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267695291330915_831_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267695291330915_831_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44dfe395 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267695291330915_831_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,356 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((279 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((294 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((325 << 6) | (i1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((334 << 6) | (i1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((349 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((356 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 4800, 9, 0, 4800, 9, 0, 5952, 9, 0, 5952, 9, 0, 14976, 13, 0, 14976, 13, 0, 14976, 13, 0, 17856, 2, 0, 17860, 2, 0, 17864, 2, 0, 17872, 2, 0, 17876, 2, 0, 17880, 2, 0, 17888, 2, 0, 17892, 2, 0, 17896, 2, 0, 23936, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267695375775910_832_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267695375775910_832_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..49737b7d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267695375775910_832_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 6, 0, 2048, 6, 0, 1792, 8, 0, 1536, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267695424156744_833_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267695424156744_833_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5ef4df7 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267695424156744_833_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,366 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 4688, 2, 0, 4704, 2, 0, 4720, 2, 0, 5632, 1, 0, 8896, 2, 0, 10256, 1, 0, 10272, 1, 0, 10288, 1, 0, 10688, 1, 0, 13120, 4, 0, 13568, 8, 0, 15424, 1, 0, 20224, 2, 0, 20928, 2, 0, 21248, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267695528728615_834_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267695528728615_834_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0b5745cf --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267695528728615_834_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,268 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 8896, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267695591400097_835_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267695591400097_835_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..80441289 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267695591400097_835_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,389 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((235 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((254 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9088, 8, 0, 9104, 8, 0, 10176, 1, 0, 11344, 2, 0, 11360, 2, 0, 11376, 2, 0, 13968, 2, 0, 13984, 2, 0, 14000, 2, 0, 15040, 4, 0, 15056, 4, 0, 15072, 4, 0, 16256, 4, 0, 16260, 4, 0, 16264, 4, 0, 16272, 4, 0, 16276, 4, 0, 16280, 4, 0, 16288, 4, 0, 16292, 4, 0, 16296, 4, 0, 17280, 1, 0, 17920, 1, 0, 18816, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267695818799915_836_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267695818799915_836_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9b8005d5 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267695818799915_836_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,270 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 8, 0, 1184, 8, 0, 2580, 9, 0, 2580, 9, 0, 2584, 9, 0, 2584, 9, 0, 2588, 9, 0, 2588, 9, 0, 2596, 9, 0, 2596, 9, 0, 2600, 9, 0, 2600, 9, 0, 2604, 9, 0, 2604, 9, 0, 3540, 9, 0, 3540, 9, 0, 3544, 9, 0, 3544, 9, 0, 3548, 9, 0, 3548, 9, 0, 3556, 9, 0, 3556, 9, 0, 3560, 9, 0, 3560, 9, 0, 3564, 9, 0, 3564, 9, 0, 3984, 8, 0, 4000, 8, 0, 4864, 4, 0, 5760, 1, 0, 6336, 1, 0, 7888, 2, 0, 7892, 2, 0, 7896, 2, 0, 7904, 2, 0, 7908, 2, 0, 7912, 2, 0, 7920, 2, 0, 7924, 2, 0, 7928, 2, 0, 8336, 1, 0, 8352, 1, 0, 8368, 1, 0, 10368, 8, 0, 11072, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267696159316730_838_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267696159316730_838_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..caa86b21 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267696159316730_838_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5824, 9, 0, 5824, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267696210521702_839_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267696210521702_839_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bedf5ee9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267696210521702_839_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,101 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267696488972056_841_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267696488972056_841_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c234d33c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267696488972056_841_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,236 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 13, 0, 1088, 13, 0, 1088, 13, 0, 2304, 9, 0, 2304, 9, 0, 2320, 9, 0, 2320, 9, 0, 2336, 9, 0, 2336, 9, 0, 3456, 9, 0, 3456, 9, 0, 3472, 9, 0, 3472, 9, 0, 3488, 9, 0, 3488, 9, 0, 4160, 13, 0, 4160, 13, 0, 4160, 13, 0, 4176, 13, 0, 4176, 13, 0, 4176, 13, 0, 4192, 13, 0, 4192, 13, 0, 4192, 13, 0, 5248, 9, 0, 5248, 9, 0, 6656, 1, 0, 6672, 1, 0, 6688, 1, 0, 8256, 2, 0, 9344, 2, 0, 9360, 2, 0, 9376, 2, 0, 9664, 4, 0, 11024, 8, 0, 11040, 8, 0, 14144, 13, 0, 14144, 13, 0, 14144, 13, 0, 13888, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267696701885823_843_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267696701885823_843_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17885f1a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267696701885823_843_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,199 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 5888, 1, 0, 9104, 4, 0, 9728, 12, 0, 9728, 12, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267696753578041_844_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267696753578041_844_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..492e957d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267696753578041_844_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((193 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((251 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 9, 0, 1152, 9, 0, 1168, 9, 0, 1168, 9, 0, 1184, 9, 0, 1184, 9, 0, 2304, 9, 0, 2304, 9, 0, 2320, 9, 0, 2320, 9, 0, 2336, 9, 0, 2336, 9, 0, 3776, 1, 0, 3792, 1, 0, 3808, 1, 0, 6980, 8, 0, 6984, 8, 0, 6988, 8, 0, 6996, 8, 0, 7000, 8, 0, 7004, 8, 0, 7012, 8, 0, 7016, 8, 0, 7020, 8, 0, 7680, 9, 0, 7680, 9, 0, 7696, 9, 0, 7696, 9, 0, 7712, 9, 0, 7712, 9, 0, 8256, 4, 0, 8272, 4, 0, 8288, 4, 0, 9920, 2, 0, 9936, 2, 0, 9952, 2, 0, 11392, 2, 0, 11396, 2, 0, 11408, 2, 0, 11412, 2, 0, 11424, 2, 0, 11428, 2, 0, 12928, 4, 0, 12944, 4, 0, 12960, 4, 0, 14096, 6, 0, 14096, 6, 0, 17040, 9, 0, 17040, 9, 0, 19216, 2, 0, 19920, 9, 0, 19920, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267696981232565_845_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267696981232565_845_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c08839a6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267696981232565_845_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,310 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 4096, 2, 0, 4112, 2, 0, 4416, 4, 0, 5312, 9, 0, 5312, 9, 0, 6080, 2, 0, 13392, 1, 0, 13408, 1, 0, 16640, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267697074765524_846_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267697074765524_846_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0b0f795 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267697074765524_846_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,181 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267697135301325_847_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267697135301325_847_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a83653c2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267697135301325_847_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,176 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1216, 1, 0, 2112, 4, 0, 2560, 8, 0, 3904, 1, 0, 5760, 1, 0, 5776, 1, 0, 5792, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267697307048369_849_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267697307048369_849_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1635ad3c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267697307048369_849_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,188 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 5, 0, 2752, 5, 0, 2496, 2, 0, 1728, 8, 0, 3792, 10, 0, 3792, 10, 0, 3808, 10, 0, 3808, 10, 0, 3824, 10, 0, 3824, 10, 0, 4624, 1, 0, 4640, 1, 0, 4656, 1, 0, 6800, 8, 0, 6816, 8, 0, 6832, 8, 0, 8016, 10, 0, 8016, 10, 0, 8032, 10, 0, 8032, 10, 0, 8048, 10, 0, 8048, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267697562928480_852_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267697562928480_852_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9a2a1977 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267697562928480_852_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,262 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 5, 0, 1808, 5, 0, 1824, 5, 0, 1824, 5, 0, 4176, 5, 0, 4176, 5, 0, 4192, 5, 0, 4192, 5, 0, 4992, 8, 0, 7232, 8, 0, 10624, 4, 0, 10640, 4, 0, 10656, 4, 0, 11584, 4, 0, 11600, 4, 0, 11616, 4, 0, 13504, 1, 0, 14848, 2, 0, 15424, 10, 0, 15424, 10, 0, 17280, 1, 0, 17024, 2, 0, 16640, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267697663662900_853_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267697663662900_853_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf7b86e9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267697663662900_853_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3792, 1, 0, 3808, 1, 0, 3824, 1, 0, 4672, 2, 0, 5760, 2, 0, 5776, 2, 0, 6656, 4, 0, 7104, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267697721529003_854_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267697721529003_854_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..52007297 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267697721529003_854_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,243 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 9, 0, 768, 9, 0, 6852, 2, 0, 6856, 2, 0, 6860, 2, 0, 6868, 2, 0, 6872, 2, 0, 6876, 2, 0, 9668, 2, 0, 9672, 2, 0, 9676, 2, 0, 9684, 2, 0, 9688, 2, 0, 9692, 2, 0, 9984, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267697798832990_855_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267697798832990_855_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cb26cd29 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267697798832990_855_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,264 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 1, 0, 8272, 8, 0, 8288, 8, 0, 8896, 5, 0, 8896, 5, 0, 10368, 9, 0, 10368, 9, 0, 11264, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267697871780908_856_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267697871780908_856_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3f4dba36 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267697871780908_856_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,271 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1, 0, 1360, 1, 0, 7296, 4, 0, 7312, 4, 0, 8320, 14, 0, 8320, 14, 0, 8320, 14, 0, 9680, 8, 0, 9696, 8, 0, 10512, 4, 0, 10528, 4, 0, 11136, 8, 0, 12288, 9, 0, 12288, 9, 0, 13328, 1, 0, 13344, 1, 0, 13360, 1, 0, 14352, 8, 0, 14368, 8, 0, 14384, 8, 0, 15504, 8, 0, 15520, 8, 0, 15536, 8, 0, 16208, 8, 0, 16224, 8, 0, 16240, 8, 0, 16784, 1, 0, 16800, 1, 0, 16816, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267697987625192_857_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267697987625192_857_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28ecc083 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267697987625192_857_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,475 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (383 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (415 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((444 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((453 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((463 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (473 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (477 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1216, 8, 0, 2640, 8, 0, 2656, 8, 0, 4816, 8, 0, 4832, 8, 0, 5440, 8, 0, 17280, 5, 0, 17280, 5, 0, 23872, 1, 0, 30272, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267698083883171_858_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267698083883171_858_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d539d7cc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267698083883171_858_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,252 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2240, 2, 0, 3520, 7, 0, 3520, 7, 0, 3520, 7, 0, 3968, 15, 0, 3968, 15, 0, 3968, 15, 0, 3968, 15, 0, 5120, 9, 0, 5120, 9, 0, 7168, 8, 0, 7184, 8, 0, 7744, 8, 0, 7760, 8, 0, 8192, 8, 0, 8208, 8, 0, 8640, 8, 0, 9344, 9, 0, 9344, 9, 0, 11584, 6, 0, 11584, 6, 0, 12032, 6, 0, 12032, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267698242865610_860_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267698242865610_860_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bf40bc0c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267698242865610_860_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,307 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((276 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2560, 5, 0, 2560, 5, 0, 2576, 5, 0, 2576, 5, 0, 5824, 1, 0, 5840, 1, 0, 6528, 4, 0, 6544, 4, 0, 7568, 2, 0, 7584, 2, 0, 7600, 2, 0, 8656, 1, 0, 8672, 1, 0, 9808, 1, 0, 9824, 1, 0, 16980, 4, 0, 16984, 4, 0, 16988, 4, 0, 16996, 4, 0, 17000, 4, 0, 17004, 4, 0, 17684, 4, 0, 17688, 4, 0, 17692, 4, 0, 17700, 4, 0, 17704, 4, 0, 17708, 4, 0, 18112, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267698372598022_861_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267698372598022_861_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bd8fbe90 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267698372598022_861_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,157 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 1, 0, 4928, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267698428997423_862_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267698428997423_862_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7f465f7d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267698428997423_862_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,331 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 1, 0, 3968, 4, 0, 7424, 1, 0, 7440, 1, 0, 11264, 1, 0, 14016, 7, 0, 14016, 7, 0, 14016, 7, 0, 16192, 8, 0, 16208, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267698533541654_863_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267698533541654_863_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ec1bb60e --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267698533541654_863_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 1, 0, 4880, 2, 0, 4884, 2, 0, 4896, 2, 0, 4900, 2, 0, 4912, 2, 0, 4916, 2, 0, 6224, 2, 0, 6228, 2, 0, 6240, 2, 0, 6244, 2, 0, 6256, 2, 0, 6260, 2, 0, 6976, 4, 0, 10432, 8, 0, 10448, 8, 0, 11136, 8, 0, 11152, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267698634906787_864_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267698634906787_864_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4b0f44ed --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267698634906787_864_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2704, 4, 0, 2720, 4, 0, 2736, 4, 0, 6736, 4, 0, 6752, 4, 0, 6768, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267698868879229_867_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267698868879229_867_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..abb617b6 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267698868879229_867_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 2256, 9, 0, 2256, 9, 0, 3408, 9, 0, 3408, 9, 0, 4816, 9, 0, 4816, 9, 0, 5696, 9, 0, 5696, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267698929722584_868_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267698929722584_868_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bd4af27a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267698929722584_868_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,694 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((112 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((185 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((347 << 6) | (counter6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((354 << 6) | (counter6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter7 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((364 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (415 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (435 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((453 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (464 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (473 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (493 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (507 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter9 = 0; + while ((counter9 < 2)) { + counter9 = (counter9 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((523 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((530 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (544 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (554 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (563 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (568 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (575 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (584 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (591 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i10 = 0; (i10 < 3); i10 = (i10 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((606 << 6) | (i10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter11 = 0; + while ((counter11 < 2)) { + counter11 = (counter11 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((622 << 6) | (i10 << 4)) | (counter11 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter11 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((634 << 6) | (i10 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (641 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (645 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 8, 0, 10372, 1, 0, 10376, 1, 0, 10380, 1, 0, 10388, 1, 0, 10392, 1, 0, 10396, 1, 0, 11012, 1, 0, 11016, 1, 0, 11020, 1, 0, 11028, 1, 0, 11032, 1, 0, 11036, 1, 0, 20416, 2, 0, 24768, 4, 0, 25216, 8, 0, 27264, 1, 0, 31552, 4, 0, 33936, 4, 0, 33952, 4, 0, 34816, 8, 0, 36800, 8, 0, 37376, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267699170734446_869_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267699170734446_869_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2893ca88 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267699170734446_869_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,253 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((225 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 207 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 2240, 9, 0, 2240, 9, 0, 3408, 9, 0, 3408, 9, 0, 3424, 9, 0, 3424, 9, 0, 3440, 9, 0, 3440, 9, 0, 4544, 5, 0, 4544, 5, 0, 5968, 5, 0, 5968, 5, 0, 5984, 5, 0, 5984, 5, 0, 6000, 5, 0, 6000, 5, 0, 6996, 5, 0, 6996, 5, 0, 7000, 5, 0, 7000, 5, 0, 7012, 5, 0, 7012, 5, 0, 7016, 5, 0, 7016, 5, 0, 7028, 5, 0, 7028, 5, 0, 7032, 5, 0, 7032, 5, 0, 9748, 5, 0, 9748, 5, 0, 9752, 5, 0, 9752, 5, 0, 9764, 5, 0, 9764, 5, 0, 9768, 5, 0, 9768, 5, 0, 9780, 5, 0, 9780, 5, 0, 9784, 5, 0, 9784, 5, 0, 10704, 1, 0, 10720, 1, 0, 10736, 1, 0, 11136, 8, 0, 13460, 8, 0, 13464, 8, 0, 13468, 8, 0, 13476, 8, 0, 13480, 8, 0, 13484, 8, 0, 13492, 8, 0, 13496, 8, 0, 13500, 8, 0, 14420, 8, 0, 14424, 8, 0, 14428, 8, 0, 14436, 8, 0, 14440, 8, 0, 14444, 8, 0, 14452, 8, 0, 14456, 8, 0, 14460, 8, 0, 15120, 8, 0, 15136, 8, 0, 15152, 8, 0, 17152, 5, 0, 17152, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267699456460135_870_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267699456460135_870_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9a7c87f2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267699456460135_870_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2816, 2, 0, 2832, 2, 0, 8256, 4, 0, 8896, 5, 0, 8896, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267699520477630_871_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267699520477630_871_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e7dcec36 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267699520477630_871_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,157 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 10, 0, 1088, 10, 0, 2512, 8, 0, 2516, 8, 0, 2528, 8, 0, 2532, 8, 0, 3088, 8, 0, 3092, 8, 0, 3104, 8, 0, 3108, 8, 0, 3728, 8, 0, 3744, 8, 0, 5584, 1, 0, 5588, 1, 0, 5592, 1, 0, 5600, 1, 0, 5604, 1, 0, 5608, 1, 0, 6288, 5, 0, 6288, 5, 0, 6292, 5, 0, 6292, 5, 0, 6296, 5, 0, 6296, 5, 0, 6304, 5, 0, 6304, 5, 0, 6308, 5, 0, 6308, 5, 0, 6312, 5, 0, 6312, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267699679241773_872_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267699679241773_872_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6619822b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267699679241773_872_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,236 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 5, 0, 960, 5, 0, 1600, 1, 0, 4736, 4, 0, 5824, 9, 0, 5824, 9, 0, 7248, 2, 0, 11392, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267699749548520_873_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267699749548520_873_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6bf7704c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267699749548520_873_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,175 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 1, 0, 3520, 7, 0, 3520, 7, 0, 3520, 7, 0, 4688, 5, 0, 4688, 5, 0, 6864, 6, 0, 6864, 6, 0, 7568, 10, 0, 7568, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267699938258398_875_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267699938258398_875_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d0cc9672 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267699938258398_875_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1792, 2, 0, 1808, 2, 0, 1824, 2, 0, 2112, 4, 0, 2560, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267699989304141_876_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267699989304141_876_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..40e70f9d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267699989304141_876_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2192, 9, 0, 2192, 9, 0, 2208, 9, 0, 2208, 9, 0, 3344, 9, 0, 3344, 9, 0, 3360, 9, 0, 3360, 9, 0, 5972, 2, 0, 5976, 2, 0, 5980, 2, 0, 5988, 2, 0, 5992, 2, 0, 5996, 2, 0, 6848, 4, 0, 8384, 1, 0, 9808, 1, 0, 9824, 1, 0, 10496, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267700089970170_877_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267700089970170_877_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2a8ec82a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267700089970170_877_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,429 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((130 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((207 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((366 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((381 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((407 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((422 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((429 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (438 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (449 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (458 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 1, 0, 3856, 1, 0, 13252, 4, 0, 13268, 4, 0, 18896, 8, 0, 18912, 8, 0, 18928, 8, 0, 19856, 8, 0, 19872, 8, 0, 19888, 8, 0, 25024, 4, 0, 28736, 1, 0, 29312, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267700216902631_878_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267700216902631_878_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6ebed3f8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267700216902631_878_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,632 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((342 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((353 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (432 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (461 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((477 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((495 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((506 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((517 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (526 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((544 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((555 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (564 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (573 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (578 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (588 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((604 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((614 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((625 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((636 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((645 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 9664, 1, 0, 9680, 1, 0, 9696, 1, 0, 12032, 7, 0, 12032, 7, 0, 12032, 7, 0, 13184, 8, 0, 15040, 9, 0, 15040, 9, 0, 22608, 2, 0, 22624, 2, 0, 29504, 1, 0, 32400, 1, 0, 32416, 1, 0, 33104, 1, 0, 33120, 1, 0, 36672, 1, 0, 36992, 7, 0, 36992, 7, 0, 36992, 7, 0, 37632, 8, 0, 38672, 4, 0, 38688, 4, 0, 40016, 8, 0, 40032, 8, 0, 40720, 8, 0, 40736, 8, 0, 41296, 8, 0, 41312, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267700569739493_881_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267700569739493_881_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e8995f1d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267700569739493_881_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,213 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 9, 0, 1488, 9, 0, 1504, 9, 0, 1504, 9, 0, 3840, 2, 0, 6848, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267700633452906_882_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267700633452906_882_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c39f5e3d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267700633452906_882_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,303 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 8, 0, 4240, 2, 0, 4256, 2, 0, 4272, 2, 0, 7632, 2, 0, 7636, 2, 0, 7640, 2, 0, 7648, 2, 0, 7652, 2, 0, 7656, 2, 0, 7664, 2, 0, 7668, 2, 0, 7672, 2, 0, 8128, 4, 0, 15488, 8, 0, 15504, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267700787795717_883_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267700787795717_883_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..57d9da2f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267700787795717_883_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2688, 1, 0, 2704, 1, 0, 3392, 1, 0, 3408, 1, 0, 4480, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267700835395795_884_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267700835395795_884_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2ed2eedc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267700835395795_884_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,287 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 10, 0, 1408, 10, 0, 1424, 10, 0, 1424, 10, 0, 1440, 10, 0, 1440, 10, 0, 2560, 4, 0, 2576, 4, 0, 2592, 4, 0, 3200, 4, 0, 3216, 4, 0, 3232, 4, 0, 4484, 2, 0, 4488, 2, 0, 4492, 2, 0, 4500, 2, 0, 4504, 2, 0, 4508, 2, 0, 4516, 2, 0, 4520, 2, 0, 4524, 2, 0, 5184, 9, 0, 5184, 9, 0, 5200, 9, 0, 5200, 9, 0, 5216, 9, 0, 5216, 9, 0, 11008, 9, 0, 11008, 9, 0, 12544, 1, 0, 12992, 1, 0, 13696, 5, 0, 13696, 5, 0, 14400, 13, 0, 14400, 13, 0, 14400, 13, 0, 17856, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267700966835294_885_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267700966835294_885_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7f15eb6c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267700966835294_885_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 2432, 4, 0, 3840, 1, 0, 4752, 2, 0, 4768, 2, 0, 4784, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267701126109568_887_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267701126109568_887_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aff96187 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267701126109568_887_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,147 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2112, 4, 0, 2128, 4, 0, 2144, 4, 0, 3264, 4, 0, 3268, 4, 0, 3272, 4, 0, 3280, 4, 0, 3284, 4, 0, 3288, 4, 0, 3296, 4, 0, 3300, 4, 0, 3304, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267701199480703_888_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267701199480703_888_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e8809de4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267701199480703_888_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((212 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((222 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((235 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((246 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 10, 0, 1600, 10, 0, 5248, 10, 0, 5248, 10, 0, 7808, 1, 0, 11136, 5, 0, 11136, 5, 0, 10752, 2, 0, 9984, 8, 0, 12352, 6, 0, 12352, 6, 0, 12368, 6, 0, 12368, 6, 0, 13568, 5, 0, 13568, 5, 0, 13572, 5, 0, 13572, 5, 0, 13584, 5, 0, 13584, 5, 0, 13588, 5, 0, 13588, 5, 0, 14208, 5, 0, 14208, 5, 0, 14212, 5, 0, 14212, 5, 0, 14224, 5, 0, 14224, 5, 0, 14228, 5, 0, 14228, 5, 0, 14784, 5, 0, 14784, 5, 0, 14788, 5, 0, 14788, 5, 0, 14800, 5, 0, 14800, 5, 0, 14804, 5, 0, 14804, 5, 0, 15744, 5, 0, 15744, 5, 0, 15748, 5, 0, 15748, 5, 0, 15760, 5, 0, 15760, 5, 0, 15764, 5, 0, 15764, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267701521187615_889_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267701521187615_889_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1a10eab9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267701521187615_889_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267701581200411_890_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267701581200411_890_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d1061397 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267701581200411_890_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 3024, 4, 0, 3028, 4, 0, 3040, 4, 0, 3044, 4, 0, 3056, 4, 0, 3060, 4, 0, 3600, 4, 0, 3616, 4, 0, 3632, 4, 0, 6528, 8, 0, 7168, 5, 0, 7168, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267701656730813_891_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267701656730813_891_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1206377f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267701656730813_891_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,378 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((335 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (359 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 8, 0, 3264, 8, 0, 3280, 8, 0, 4992, 8, 0, 5632, 1, 0, 10624, 8, 0, 11728, 1, 0, 11744, 1, 0, 12880, 9, 0, 12880, 9, 0, 12896, 9, 0, 12896, 9, 0, 15312, 8, 0, 15328, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267701736316982_892_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267701736316982_892_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f59bdb09 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267701736316982_892_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,253 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((187 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2624, 8, 0, 4800, 4, 0, 9024, 4, 0, 9040, 4, 0, 9056, 4, 0, 9984, 4, 0, 11072, 10, 0, 11072, 10, 0, 11088, 10, 0, 11088, 10, 0, 11104, 10, 0, 11104, 10, 0, 12992, 10, 0, 12992, 10, 0, 13008, 10, 0, 13008, 10, 0, 13024, 10, 0, 13024, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267701842792861_893_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267701842792861_893_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..24576dea --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267701842792861_893_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,440 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((362 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 1, 0, 6596, 2, 0, 6600, 2, 0, 6612, 2, 0, 6616, 2, 0, 6628, 2, 0, 6632, 2, 0, 8132, 2, 0, 8136, 2, 0, 8148, 2, 0, 8152, 2, 0, 8164, 2, 0, 8168, 2, 0, 9796, 2, 0, 9800, 2, 0, 9812, 2, 0, 9816, 2, 0, 9828, 2, 0, 9832, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267701967425672_894_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267701967425672_894_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4f13485d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267701967425672_894_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1616, 2, 0, 1632, 2, 0, 1648, 2, 0, 6528, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267702071081135_896_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267702071081135_896_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5683b2ad --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267702071081135_896_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,280 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 117 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 1, 0, 1984, 2, 0, 1728, 12, 0, 1728, 12, 0, 3856, 9, 0, 3856, 9, 0, 3872, 9, 0, 3872, 9, 0, 5328, 9, 0, 5328, 9, 0, 5332, 9, 0, 5332, 9, 0, 5344, 9, 0, 5344, 9, 0, 5348, 9, 0, 5348, 9, 0, 6352, 9, 0, 6352, 9, 0, 6368, 9, 0, 6368, 9, 0, 7872, 4, 0, 7888, 4, 0, 11904, 2, 0, 13184, 13, 0, 13184, 13, 0, 13184, 13, 0, 14336, 9, 0, 14336, 9, 0, 15552, 9, 0, 15552, 9, 0, 15568, 9, 0, 15568, 9, 0, 15584, 9, 0, 15584, 9, 0, 16640, 9, 0, 16640, 9, 0, 17344, 13, 0, 17344, 13, 0, 17344, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267702217586882_897_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267702217586882_897_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b6e6ac96 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267702217586882_897_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 9, 0, 960, 9, 0, 6608, 9, 0, 6608, 9, 0, 6624, 9, 0, 6624, 9, 0, 7440, 9, 0, 7440, 9, 0, 7456, 9, 0, 7456, 9, 0, 10192, 1, 0, 10208, 1, 0, 10688, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267702330049705_898_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267702330049705_898_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a94afe59 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267702330049705_898_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((198 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((209 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + if ((i3 == 1)) { + break; + } + } + } else { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1536, 2, 0, 1552, 2, 0, 3392, 2, 0, 3408, 2, 0, 7056, 4, 0, 7072, 4, 0, 7088, 4, 0, 7680, 8, 0, 9472, 8, 0, 10112, 5, 0, 10112, 5, 0, 15424, 8, 0, 15440, 8, 0, 15456, 8, 0, 16128, 8, 0, 16144, 8, 0, 16160, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267702658575304_900_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267702658575304_900_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b9a29f61 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267702658575304_900_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3712, 13, 0, 3712, 13, 0, 3712, 13, 0, 2304, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267702705136470_901_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267702705136470_901_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..47399103 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267702705136470_901_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((185 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 8, 0, 704, 5, 0, 704, 5, 0, 2368, 13, 0, 2368, 13, 0, 2368, 13, 0, 3728, 1, 0, 3744, 1, 0, 3760, 1, 0, 4628, 1, 0, 4644, 1, 0, 4660, 1, 0, 11840, 2, 0, 11844, 2, 0, 11856, 2, 0, 11860, 2, 0, 11872, 2, 0, 11876, 2, 0, 14144, 4, 0, 14592, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267702810893341_902_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267702810893341_902_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ebf0edd8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267702810893341_902_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1940, 4, 0, 1944, 4, 0, 1948, 4, 0, 1956, 4, 0, 1960, 4, 0, 1964, 4, 0, 3008, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267702876452275_903_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267702876452275_903_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..732b82bc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267702876452275_903_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,644 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((380 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i5 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((433 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((442 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (452 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (461 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (465 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (474 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (486 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (496 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (505 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (510 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (517 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (524 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (529 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 6784, 8, 0, 28304, 8, 0, 28320, 8, 0, 28336, 8, 0, 28928, 1, 0, 30336, 1, 0, 33856, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267703066489561_905_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267703066489561_905_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2bd9ad82 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267703066489561_905_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 2448, 1, 0, 2464, 1, 0, 2752, 4, 0, 5264, 1, 0, 5280, 1, 0, 5296, 1, 0, 6416, 1, 0, 6432, 1, 0, 6448, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267703129179300_906_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267703129179300_906_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66e569e3 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267703129179300_906_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 7, 0, 1728, 7, 0, 1728, 7, 0, 1344, 8, 0, 8400, 1, 0, 8416, 1, 0, 8432, 1, 0, 9104, 1, 0, 9120, 1, 0, 9136, 1, 0, 9984, 4, 0, 14272, 1, 0, 14912, 5, 0, 14912, 5, 0, 15488, 5, 0, 15488, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267703208681320_907_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267703208681320_907_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fe51c0ed --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267703208681320_907_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((105 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 2896, 1, 0, 2912, 1, 0, 2928, 1, 0, 3536, 1, 0, 3552, 1, 0, 3568, 1, 0, 7316, 8, 0, 7332, 8, 0, 7348, 8, 0, 8656, 9, 0, 8656, 9, 0, 8672, 9, 0, 8672, 9, 0, 8688, 9, 0, 8688, 9, 0, 9472, 9, 0, 9472, 9, 0, 12480, 8, 0, 12496, 8, 0, 13056, 2, 0, 13072, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267703361277644_908_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267703361277644_908_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..13771e41 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267703361277644_908_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,450 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 2640, 1, 0, 2656, 1, 0, 3216, 1, 0, 3232, 1, 0, 9344, 4, 0, 13312, 8, 0, 14720, 8, 0, 14736, 8, 0, 14752, 8, 0, 15616, 4, 0, 17168, 5, 0, 17168, 5, 0, 17184, 5, 0, 17184, 5, 0, 17808, 5, 0, 17808, 5, 0, 17824, 5, 0, 17824, 5, 0, 18384, 5, 0, 18384, 5, 0, 18400, 5, 0, 18400, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267703520419312_909_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267703520419312_909_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..df755721 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267703520419312_909_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 153 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 13, 0, 1088, 13, 0, 1088, 13, 0, 2048, 8, 0, 2064, 8, 0, 2080, 8, 0, 3712, 9, 0, 3712, 9, 0, 3716, 9, 0, 3716, 9, 0, 3720, 9, 0, 3720, 9, 0, 3728, 9, 0, 3728, 9, 0, 3732, 9, 0, 3732, 9, 0, 3736, 9, 0, 3736, 9, 0, 3744, 9, 0, 3744, 9, 0, 3748, 9, 0, 3748, 9, 0, 3752, 9, 0, 3752, 9, 0, 4416, 9, 0, 4416, 9, 0, 4420, 9, 0, 4420, 9, 0, 4424, 9, 0, 4424, 9, 0, 4432, 9, 0, 4432, 9, 0, 4436, 9, 0, 4436, 9, 0, 4440, 9, 0, 4440, 9, 0, 4448, 9, 0, 4448, 9, 0, 4452, 9, 0, 4452, 9, 0, 4456, 9, 0, 4456, 9, 0, 4864, 1, 0, 4868, 1, 0, 4872, 1, 0, 4880, 1, 0, 4884, 1, 0, 4888, 1, 0, 4896, 1, 0, 4900, 1, 0, 4904, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267703964007604_910_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267703964007604_910_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..63d523ac --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267703964007604_910_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,132 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1664, 4, 0, 3904, 1, 0, 4864, 5, 0, 4864, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704025161569_911_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704025161569_911_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3683a764 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704025161569_911_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((187 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 8, 0, 2816, 10, 0, 2816, 10, 0, 3968, 8, 0, 5504, 8, 0, 14016, 9, 0, 14016, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704077923442_912_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704077923442_912_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..af47cade --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704077923442_912_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,126 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 5, 0, 2176, 5, 0, 2816, 9, 0, 2816, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704147548050_913_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704147548050_913_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..090c92b4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704147548050_913_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 4, 0, 3344, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704205710016_914_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704205710016_914_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6372c5f1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704205710016_914_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,432 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 2)) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((301 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((326 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((387 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((397 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((406 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((421 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9152, 4, 0, 9600, 8, 0, 10496, 1, 0, 22208, 8, 0, 23632, 9, 0, 23632, 9, 0, 23648, 9, 0, 23648, 9, 0, 23664, 9, 0, 23664, 9, 0, 24784, 9, 0, 24784, 9, 0, 24800, 9, 0, 24800, 9, 0, 24816, 9, 0, 24816, 9, 0, 25424, 8, 0, 25440, 8, 0, 25456, 8, 0, 26960, 13, 0, 26960, 13, 0, 26960, 13, 0, 26976, 13, 0, 26976, 13, 0, 26976, 13, 0, 26992, 13, 0, 26992, 13, 0, 26992, 13, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704473109955_916_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704473109955_916_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0f014885 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704473109955_916_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 1, 0, 2432, 1, 0, 2752, 1, 0, 5632, 1, 0, 5648, 1, 0, 5664, 1, 0, 5952, 9, 0, 5952, 9, 0, 9600, 10, 0, 9600, 10, 0, 9344, 5, 0, 9344, 5, 0, 10240, 5, 0, 10240, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704565962996_917_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704565962996_917_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b8809815 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704565962996_917_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,150 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 4176, 6, 0, 4176, 6, 0, 4192, 6, 0, 4192, 6, 0, 4208, 6, 0, 4208, 6, 0, 6464, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704623945749_918_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704623945749_918_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ba2aaf39 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704623945749_918_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1472, 4, 0, 1920, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704669383107_919_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704669383107_919_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9942f939 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704669383107_919_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((181 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 8448, 2, 0, 11604, 4, 0, 11608, 4, 0, 11612, 4, 0, 11620, 4, 0, 11624, 4, 0, 11628, 4, 0, 12304, 4, 0, 12320, 4, 0, 13008, 4, 0, 13024, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704845714739_921_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704845714739_921_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5e12a1d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704845714739_921_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 12, 0, 1664, 12, 0, 1280, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267704968676985_923_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267704968676985_923_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cabdd67a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267704968676985_923_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2048, 2, 0, 2064, 2, 0, 2080, 2, 0, 2948, 8, 0, 2952, 8, 0, 2956, 8, 0, 2964, 8, 0, 2968, 8, 0, 2972, 8, 0, 2980, 8, 0, 2984, 8, 0, 2988, 8, 0, 3396, 8, 0, 3400, 8, 0, 3404, 8, 0, 3412, 8, 0, 3416, 8, 0, 3420, 8, 0, 3428, 8, 0, 3432, 8, 0, 3436, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267705055585352_924_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267705055585352_924_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a3f5fda2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267705055585352_924_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1216, 9, 0, 1216, 9, 0, 2624, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267705225698255_926_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267705225698255_926_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f70cf98a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267705225698255_926_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,414 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (338 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 5184, 8, 0, 6272, 5, 0, 6272, 5, 0, 12864, 2, 0, 14224, 8, 0, 14240, 8, 0, 14256, 8, 0, 15888, 8, 0, 15904, 8, 0, 15920, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267705442820283_929_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267705442820283_929_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..da72aa98 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267705442820283_929_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,262 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (counter2 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((188 << 6) | (counter2 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6160, 8, 0, 6176, 8, 0, 6192, 8, 0, 7120, 5, 0, 7120, 5, 0, 7136, 5, 0, 7136, 5, 0, 7152, 5, 0, 7152, 5, 0, 10192, 2, 0, 10208, 2, 0, 10224, 2, 0, 13136, 8, 0, 13152, 8, 0, 13168, 8, 0, 13584, 2, 0, 13600, 2, 0, 13616, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267705530163949_930_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267705530163949_930_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e65d2ace --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267705530163949_930_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,439 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (347 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((409 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (425 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 3, 0, 3584, 3, 0, 3200, 8, 0, 2176, 4, 0, 4416, 1, 0, 17920, 8, 0, 19264, 1, 0, 20944, 1, 0, 20960, 1, 0, 21520, 1, 0, 21536, 1, 0, 22208, 1, 0, 23872, 8, 0, 25024, 8, 0, 26192, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267705614088183_931_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267705614088183_931_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c7e5a39 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267705614088183_931_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,216 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 2432, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267705662837470_932_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267705662837470_932_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..96390448 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267705662837470_932_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,434 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((192 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + } + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((271 << 6) | (i6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i6 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (i8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i8 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (430 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (441 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (445 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (462 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter9 = 0; + while ((counter9 < 3)) { + counter9 = (counter9 + 1); + for (uint i10 = 0; (i10 < 2); i10 = (i10 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((484 << 6) | (counter9 << 4)) | (i10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((494 << 6) | (counter9 << 4)) | (i10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((511 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 3268, 2, 0, 3272, 2, 0, 3284, 2, 0, 3288, 2, 0, 3300, 2, 0, 3304, 2, 0, 5632, 6, 0, 5632, 6, 0, 7808, 2, 0, 7812, 2, 0, 7824, 2, 0, 7828, 2, 0, 16256, 1, 0, 16272, 1, 0, 17344, 1, 0, 17348, 1, 0, 17352, 1, 0, 17360, 1, 0, 17364, 1, 0, 17368, 1, 0, 18240, 2, 0, 18256, 2, 0, 19072, 9, 0, 19072, 9, 0, 20736, 2, 0, 22912, 2, 0, 22928, 2, 0, 22944, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267706237068896_933_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267706237068896_933_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eac5b9e8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267706237068896_933_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1152, 5, 0, 1152, 5, 0, 2576, 11, 0, 2576, 11, 0, 2576, 11, 0, 2592, 11, 0, 2592, 11, 0, 2592, 11, 0, 3536, 6, 0, 3536, 6, 0, 3552, 6, 0, 3552, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267706377663231_935_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267706377663231_935_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef652839 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267706377663231_935_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,202 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 9, 0, 2496, 9, 0, 2240, 4, 0, 1984, 2, 0, 5824, 4, 0, 8448, 9, 0, 8448, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267706433041347_936_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267706433041347_936_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..54e5da9c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267706433041347_936_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,443 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((221 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 1)) { + break; + } + } + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (359 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((381 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 3200, 1, 0, 3216, 1, 0, 4992, 4, 0, 5008, 4, 0, 6528, 4, 0, 6544, 4, 0, 7104, 1, 0, 7120, 1, 0, 8192, 1, 0, 8196, 1, 0, 8200, 1, 0, 8208, 1, 0, 8212, 1, 0, 8216, 1, 0, 8768, 1, 0, 8772, 1, 0, 8776, 1, 0, 8784, 1, 0, 8788, 1, 0, 8792, 1, 0, 12544, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267706636981777_937_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267706636981777_937_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..60cee3fb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267706636981777_937_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,126 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4736, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267706700539219_938_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267706700539219_938_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..abc8e141 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267706700539219_938_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267706759799567_939_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267706759799567_939_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dc602e67 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267706759799567_939_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 13, 0, 1104, 13, 0, 1104, 13, 0, 1120, 13, 0, 1120, 13, 0, 1120, 13, 0, 2256, 9, 0, 2256, 9, 0, 2272, 9, 0, 2272, 9, 0, 2896, 9, 0, 2896, 9, 0, 2912, 9, 0, 2912, 9, 0, 4496, 9, 0, 4496, 9, 0, 4512, 9, 0, 4512, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267706824933274_940_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267706824933274_940_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..00372b33 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267706824933274_940_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,260 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 13, 0, 1104, 13, 0, 1104, 13, 0, 1120, 13, 0, 1120, 13, 0, 1120, 13, 0, 1872, 2, 0, 1888, 2, 0, 4688, 13, 0, 4688, 13, 0, 4688, 13, 0, 4704, 13, 0, 4704, 13, 0, 4704, 13, 0, 7808, 4, 0, 7552, 9, 0, 7552, 9, 0, 6912, 2, 0, 10068, 1, 0, 10072, 1, 0, 10084, 1, 0, 10088, 1, 0, 10100, 1, 0, 10104, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267706962925565_941_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267706962925565_941_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..125cb73d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267706962925565_941_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,225 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 4032, 4, 0, 4672, 9, 0, 4672, 9, 0, 9344, 2, 0, 9360, 2, 0, 10048, 2, 0, 10064, 2, 0, 10944, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267707026831394_942_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267707026831394_942_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..78677220 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267707026831394_942_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 9, 0, 1088, 9, 0, 2240, 9, 0, 2240, 9, 0, 4032, 9, 0, 4032, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267707073498695_943_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267707073498695_943_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2a9f55d2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267707073498695_943_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267707127366672_944_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267707127366672_944_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d71d15bc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267707127366672_944_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,208 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 5, 0, 1936, 5, 0, 1952, 5, 0, 1952, 5, 0, 3472, 5, 0, 3472, 5, 0, 3488, 5, 0, 3488, 5, 0, 5904, 8, 0, 5920, 8, 0, 7184, 5, 0, 7184, 5, 0, 7200, 5, 0, 7200, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267707297250367_946_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267707297250367_946_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ccff4533 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267707297250367_946_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,244 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 2048, 9, 0, 2048, 9, 0, 2624, 1, 0, 2944, 15, 0, 2944, 15, 0, 2944, 15, 0, 2944, 15, 0, 4224, 8, 0, 7040, 1, 0, 10752, 4, 0, 10768, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267707425476353_948_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267707425476353_948_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e05d5208 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267707425476353_948_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,183 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 9, 0, 1408, 9, 0, 2704, 8, 0, 2720, 8, 0, 2736, 8, 0, 3408, 8, 0, 3424, 8, 0, 3440, 8, 0, 5248, 9, 0, 5248, 9, 0, 8576, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267707499018719_949_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267707499018719_949_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..58b49ba0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267707499018719_949_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,331 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((161 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 1, 0, 2944, 9, 0, 2944, 9, 0, 10880, 2, 0, 10884, 2, 0, 10896, 2, 0, 10900, 2, 0, 13120, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267707649543469_951_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267707649543469_951_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8817d1c0 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267707649543469_951_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,209 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + } + break; + } + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((181 << 6) | (counter4 << 4)) | (counter5 << 2)) | i6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 201 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 9, 0, 1296, 9, 0, 1312, 9, 0, 1312, 9, 0, 8464, 7, 0, 8464, 7, 0, 8464, 7, 0, 8480, 7, 0, 8480, 7, 0, 8480, 7, 0, 8496, 7, 0, 8496, 7, 0, 8496, 7, 0, 9492, 8, 0, 9496, 8, 0, 9500, 8, 0, 9508, 8, 0, 9512, 8, 0, 9516, 8, 0, 9524, 8, 0, 9528, 8, 0, 9532, 8, 0, 10388, 5, 0, 10388, 5, 0, 10392, 5, 0, 10392, 5, 0, 10396, 5, 0, 10396, 5, 0, 10404, 5, 0, 10404, 5, 0, 10408, 5, 0, 10408, 5, 0, 10412, 5, 0, 10412, 5, 0, 10420, 5, 0, 10420, 5, 0, 10424, 5, 0, 10424, 5, 0, 10428, 5, 0, 10428, 5, 0, 11604, 1, 0, 11605, 1, 0, 11606, 1, 0, 11608, 1, 0, 11609, 1, 0, 11610, 1, 0, 11612, 1, 0, 11613, 1, 0, 11614, 1, 0, 11620, 1, 0, 11621, 1, 0, 11622, 1, 0, 11624, 1, 0, 11625, 1, 0, 11626, 1, 0, 11628, 1, 0, 11629, 1, 0, 11630, 1, 0, 11636, 1, 0, 11637, 1, 0, 11638, 1, 0, 11640, 1, 0, 11641, 1, 0, 11642, 1, 0, 11644, 1, 0, 11645, 1, 0, 11646, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267708673436890_952_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267708673436890_952_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c9a205a2 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267708673436890_952_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,294 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((174 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 1, 0, 1184, 1, 0, 1744, 1, 0, 1760, 1, 0, 2624, 4, 0, 3520, 1, 0, 7168, 2, 0, 7184, 2, 0, 12880, 4, 0, 12896, 4, 0, 12912, 4, 0, 13312, 12, 0, 13312, 12, 0, 13952, 9, 0, 13952, 9, 0, 14848, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267708845754209_954_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267708845754209_954_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3de1e3fd --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267708845754209_954_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267709041825449_956_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267709041825449_956_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66e786f1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267709041825449_956_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 12, 0, 576, 12, 0, 5632, 1, 0, 10880, 2, 0, 11792, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267709113606849_957_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267709113606849_957_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28cc4dfc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267709113606849_957_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 7424, 4, 0, 8064, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267709337587045_960_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267709337587045_960_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e7d5a9e9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267709337587045_960_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,210 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1152, 1, 0, 3716, 4, 0, 3720, 4, 0, 3732, 4, 0, 3736, 4, 0, 4736, 9, 0, 4736, 9, 0, 8512, 4, 0, 8528, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267709414748733_961_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267709414748733_961_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..704c5dc8 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267709414748733_961_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 1, 0, 3152, 4, 0, 3156, 4, 0, 3160, 4, 0, 3168, 4, 0, 3172, 4, 0, 3176, 4, 0, 3184, 4, 0, 3188, 4, 0, 3192, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267709587077413_963_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267709587077413_963_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5f74e04c --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267709587077413_963_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,500 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((362 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (373 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (402 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (419 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (428 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (433 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (440 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (447 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1872, 1, 0, 1888, 1, 0, 1904, 1, 0, 9792, 4, 0, 10240, 8, 0, 11584, 2, 0, 14912, 1, 0, 16192, 2, 0, 20544, 6, 0, 20544, 6, 0, 21584, 1, 0, 21600, 1, 0, 25728, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267709699667098_964_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267709699667098_964_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d1e5e371 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267709699667098_964_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,279 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1216, 1, 0, 6528, 1, 0, 10816, 8, 0, 13504, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267709808967816_966_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267709808967816_966_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dcb06649 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267709808967816_966_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1216, 8, 0, 7872, 4, 0, 9024, 4, 0, 10240, 4, 0, 10256, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267709876963941_967_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267709876963941_967_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..203a5608 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267709876963941_967_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 3264, 1, 0, 6080, 4, 0, 6528, 8, 0, 8320, 5, 0, 8320, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267709946522624_968_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267709946522624_968_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0f6a9771 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267709946522624_968_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((111 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 10, 0, 1920, 10, 0, 1536, 1, 0, 1152, 4, 0, 2560, 1, 0, 3456, 4, 0, 3904, 8, 0, 4544, 1, 0, 5456, 1, 0, 5472, 1, 0, 5488, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267710012165275_969_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267710012165275_969_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7b403662 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267710012165275_969_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1152, 1, 0, 1472, 15, 0, 1472, 15, 0, 1472, 15, 0, 1472, 15, 0, 2368, 5, 0, 2368, 5, 0, 5504, 9, 0, 5504, 9, 0, 5248, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267710065074536_970_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267710065074536_970_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1d0d6f18 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267710065074536_970_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,155 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 9, 0, 1296, 9, 0, 1312, 9, 0, 1312, 9, 0, 2960, 9, 0, 2960, 9, 0, 2964, 9, 0, 2964, 9, 0, 2976, 9, 0, 2976, 9, 0, 2980, 9, 0, 2980, 9, 0, 4736, 4, 0, 6656, 11, 0, 6656, 11, 0, 6656, 11, 0, 7616, 11, 0, 7616, 11, 0, 7616, 11, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267710150790591_971_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267710150790591_971_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..88a13d2d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267710150790591_971_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 9, 0, 1104, 9, 0, 1120, 9, 0, 1120, 9, 0, 2576, 6, 0, 2576, 6, 0, 2580, 6, 0, 2580, 6, 0, 2592, 6, 0, 2592, 6, 0, 2596, 6, 0, 2596, 6, 0, 3536, 13, 0, 3536, 13, 0, 3536, 13, 0, 3540, 13, 0, 3540, 13, 0, 3540, 13, 0, 3552, 13, 0, 3552, 13, 0, 3552, 13, 0, 3556, 13, 0, 3556, 13, 0, 3556, 13, 0, 6528, 1, 0, 6532, 1, 0, 6536, 1, 0, 6544, 1, 0, 6548, 1, 0, 6552, 1, 0, 6560, 1, 0, 6564, 1, 0, 6568, 1, 0, 7424, 1, 0, 7440, 1, 0, 7456, 1, 0, 13440, 5, 0, 13440, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267710379598041_973_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267710379598041_973_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a0a6f9a9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267710379598041_973_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((24 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1552, 1, 0, 1556, 1, 0, 1568, 1, 0, 1572, 1, 0, 1584, 1, 0, 1588, 1, 0, 2000, 1, 0, 2004, 1, 0, 2016, 1, 0, 2020, 1, 0, 2032, 1, 0, 2036, 1, 0, 9408, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267710730092535_975_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267710730092535_975_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..16a739be --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267710730092535_975_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1728, 8, 0, 6464, 8, 0, 7936, 6, 0, 7936, 6, 0, 7952, 6, 0, 7952, 6, 0, 9280, 1, 0, 9296, 1, 0, 9312, 1, 0, 10688, 1, 0, 10692, 1, 0, 10704, 1, 0, 10708, 1, 0, 10720, 1, 0, 10724, 1, 0, 11328, 1, 0, 11344, 1, 0, 11360, 1, 0, 12544, 10, 0, 12544, 10, 0, 12560, 10, 0, 12560, 10, 0, 12576, 10, 0, 12576, 10, 0, 13696, 9, 0, 13696, 9, 0, 13712, 9, 0, 13712, 9, 0, 13728, 9, 0, 13728, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267710908182721_976_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267710908182721_976_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a83fcb9b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267710908182721_976_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267710973962137_977_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267710973962137_977_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..add27d8f --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267710973962137_977_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 6, 0, 2560, 6, 0, 1920, 9, 0, 1920, 9, 0, 3648, 5, 0, 3648, 5, 0, 5504, 1, 0, 7744, 9, 0, 7744, 9, 0, 15104, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267711049247742_978_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267711049247742_978_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2f88a3f4 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267711049247742_978_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,517 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + if ((i0 == 2)) { + break; + } + } + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((258 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (357 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((373 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (433 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((469 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((486 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((507 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter7 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (519 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (529 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (536 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (554 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 11, 0, 1728, 11, 0, 1728, 11, 0, 2880, 1, 0, 2896, 1, 0, 2912, 1, 0, 15552, 1, 0, 15568, 1, 0, 15584, 1, 0, 16512, 2, 0, 16516, 2, 0, 16528, 2, 0, 16532, 2, 0, 16544, 2, 0, 16548, 2, 0, 18624, 1, 0, 23888, 2, 0, 23904, 2, 0, 32464, 8, 0, 32468, 8, 0, 32472, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267711363505079_979_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267711363505079_979_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..78d02041 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267711363505079_979_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,135 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 4544, 4, 0, 4548, 4, 0, 4560, 4, 0, 4564, 4, 0, 5312, 5, 0, 5312, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267711427241956_980_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267711427241956_980_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..98a91fd1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267711427241956_980_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 9408, 10, 0, 9408, 10, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267711480983539_981_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267711480983539_981_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..54dc6d9b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267711480983539_981_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,403 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 1472, 4, 0, 4864, 14, 0, 4864, 14, 0, 4864, 14, 0, 5504, 8, 0, 7696, 4, 0, 7712, 4, 0, 8576, 1, 0, 14144, 4, 0, 16384, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267711568396482_982_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267711568396482_982_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..49166deb --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267711568396482_982_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1, 0, 1360, 1, 0, 1376, 1, 0, 5904, 4, 0, 5920, 4, 0, 5936, 4, 0, 6336, 14, 0, 6336, 14, 0, 6336, 14, 0, 6976, 9, 0, 6976, 9, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267711652865578_983_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267711652865578_983_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..45e9ceb9 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267711652865578_983_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,234 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1152, 1, 0, 2816, 2, 0, 3776, 4, 0, 4736, 1, 0, 6272, 8, 0, 7168, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267711708764787_984_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267711708764787_984_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29506a5a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267711708764787_984_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 1, 0, 1744, 1, 0, 1760, 1, 0, 4288, 5, 0, 4288, 5, 0, 4304, 5, 0, 4304, 5, 0, 4320, 5, 0, 4320, 5, 0, 5952, 4, 0, 5968, 4, 0, 5984, 4, 0, 7360, 8, 0, 7376, 8, 0, 8512, 8, 0, 8528, 8, 0, 9152, 9, 0, 9152, 9, 0, 13248, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267711937176565_985_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267711937176565_985_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4413b6fc --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267711937176565_985_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,210 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 5, 0, 576, 5, 0, 1152, 5, 0, 1152, 5, 0, 2304, 9, 0, 2304, 9, 0, 2944, 8, 0, 3584, 8, 0, 5632, 9, 0, 5632, 9, 0, 6336, 2, 0, 8768, 6, 0, 8768, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267712006884212_986_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267712006884212_986_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0112bf5d --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267712006884212_986_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,303 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 1984, 1, 0, 6528, 4, 0, 8464, 8, 0, 8468, 8, 0, 8472, 8, 0, 8480, 8, 0, 8484, 8, 0, 8488, 8, 0, 8496, 8, 0, 8500, 8, 0, 8504, 8, 0, 11792, 8, 0, 11808, 8, 0, 11824, 8, 0, 12416, 1, 0, 14288, 4, 0, 14304, 4, 0, 15424, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267712251471030_988_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267712251471030_988_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2d54e87b --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267712251471030_988_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,501 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((311 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((374 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((389 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (408 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((426 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter8 == 2)) { + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (446 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (456 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i9 = 0; (i9 < 2); i9 = (i9 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((473 << 6) | (i9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i10 = 0; (i10 < 2); i10 = (i10 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((488 << 6) | (i9 << 4)) | (i10 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (495 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (500 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (507 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3204, 1, 0, 3208, 1, 0, 3220, 1, 0, 3224, 1, 0, 3236, 1, 0, 3240, 1, 0, 3904, 1, 0, 3920, 1, 0, 3936, 1, 0, 5824, 1, 0, 5840, 1, 0, 5856, 1, 0, 10176, 2, 0, 14656, 4, 0, 15104, 8, 0, 18752, 1, 0, 19920, 1, 0, 19936, 1, 0, 19952, 1, 0, 20864, 1, 0, 22336, 4, 0, 23936, 4, 0, 23952, 4, 0, 24896, 4, 0, 24912, 4, 0, 27904, 8, 0, 28544, 1, 0, 32000, 4, 0, 32448, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267713295191872_990_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267713295191872_990_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..601d2fae --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267713295191872_990_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,328 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((150 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((167 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1296, 1, 0, 1312, 1, 0, 1328, 1, 0, 4752, 1, 0, 4768, 1, 0, 4784, 1, 0, 9620, 2, 0, 9624, 2, 0, 9628, 2, 0, 9636, 2, 0, 9640, 2, 0, 9644, 2, 0, 11456, 7, 0, 11456, 7, 0, 11456, 7, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267713489900465_991_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267713489900465_991_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..372f906a --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267713489900465_991_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 3)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3968, 4, 0, 3984, 4, 0, 4000, 4, 0, 5056, 4, 0, 5072, 4, 0, 5088, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267713542877036_992_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267713542877036_992_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7180f114 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267713542877036_992_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2564, 4, 0, 2568, 4, 0, 2572, 4, 0, 2580, 4, 0, 2584, 4, 0, 2588, 4, 0, 2596, 4, 0, 2600, 4, 0, 2604, 4, 0, 5312, 8, 0, 6464, 8, 0, 7296, 12, 0, 7296, 12, 0, 8000, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267713619393798_993_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267713619393798_993_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d7ace633 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267713619393798_993_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2000, 1, 0, 2016, 1, 0, 2944, 1, 0, 5696, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267713829251159_995_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267713829251159_995_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0b942ef --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267713829251159_995_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,226 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 2064, 4, 0, 2080, 4, 0, 2704, 4, 0, 2720, 4, 0, 4736, 8, 0, 5632, 5, 0, 5632, 5, 0, 6592, 3, 0, 6592, 3, 0, 6608, 3, 0, 6608, 3, 0, 6624, 3, 0, 6624, 3, 0, 7232, 2, 0, 7248, 2, 0, 7264, 2, 0, 7680, 12, 0, 7680, 12, 0, 7696, 12, 0, 7696, 12, 0, 7712, 12, 0, 7712, 12, 0, 8320, 5, 0, 8320, 5, 0, 8896, 5, 0, 8896, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267713940970371_996_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267713940970371_996_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d9efe7c1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267713940970371_996_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,213 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2768, 8, 0, 3472, 8, 0, 4800, 8, 0, 5248, 1, 0, 7696, 1, 0, 7712, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267714142644506_998_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267714142644506_998_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1ebd0399 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267714142644506_998_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,484 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 2)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((189 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((284 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((302 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((313 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((322 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((339 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (359 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (438 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (451 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (462 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (472 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (498 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (513 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((527 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter8 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (537 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (550 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (557 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1424, 1, 0, 3088, 5, 0, 3088, 5, 0, 4304, 5, 0, 4304, 5, 0, 4308, 5, 0, 4308, 5, 0, 5264, 5, 0, 5264, 5, 0, 13248, 1, 0, 15440, 1, 0, 15456, 1, 0, 15472, 1, 0, 18192, 2, 0, 18196, 2, 0, 18200, 2, 0, 18208, 2, 0, 18212, 2, 0, 18216, 2, 0, 20624, 2, 0, 20628, 2, 0, 20632, 2, 0, 20640, 2, 0, 20644, 2, 0, 20648, 2, 0, 22976, 4, 0, 35648, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267714297693638_999_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267714297693638_999_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d66048ad --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267714297693638_999_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 9, 0, 1600, 9, 0, 2240, 1, 0, 3984, 4, 0, 4000, 4, 0, 5264, 4, 0, 5280, 4, 0, 5968, 4, 0, 5984, 4, 0, 7296, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize4BitTracking/program_1756267714358499711_1000_increment_0_WaveParticipantBitTracking.test b/test/WaveSize4BitTracking/program_1756267714358499711_1000_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6a00cce1 --- /dev/null +++ b/test/WaveSize4BitTracking/program_1756267714358499711_1000_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,339 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(4, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 2)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 3)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 2)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((298 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((311 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((322 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 4 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 201 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 9, 0, 576, 9, 0, 4288, 2, 0, 4304, 2, 0, 4320, 2, 0, 4736, 2, 0, 4752, 2, 0, 4768, 2, 0, 10240, 1, 0, 10256, 1, 0, 10272, 1, 0, 11904, 4, 0, 11920, 4, 0, 11936, 4, 0, 13056, 1, 0, 13072, 1, 0, 13088, 1, 0, 13760, 1, 0, 13776, 1, 0, 13792, 1, 0, 14848, 1, 0, 14864, 1, 0, 14880, 1, 0, 15296, 12, 0, 15296, 12, 0, 15312, 12, 0, 15312, 12, 0, 15328, 12, 0, 15328, 12, 0, 16704, 1, 0, 16720, 1, 0, 17856, 5, 0, 17856, 5, 0, 17872, 5, 0, 17872, 5, 0, 19072, 5, 0, 19072, 5, 0, 19076, 5, 0, 19076, 5, 0, 19080, 5, 0, 19080, 5, 0, 19088, 5, 0, 19088, 5, 0, 19092, 5, 0, 19092, 5, 0, 19096, 5, 0, 19096, 5, 0, 19904, 5, 0, 19904, 5, 0, 19908, 5, 0, 19908, 5, 0, 19912, 5, 0, 19912, 5, 0, 19920, 5, 0, 19920, 5, 0, 19924, 5, 0, 19924, 5, 0, 19928, 5, 0, 19928, 5, 0, 20608, 5, 0, 20608, 5, 0, 20624, 5, 0, 20624, 5, 0, 21312, 5, 0, 21312, 5, 0, 21328, 5, 0, 21328, 5, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756419738478984882_1_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756419738478984882_1_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..da5a5484 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756419738478984882_1_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,88 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756419999439376835_3_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756419999439376835_3_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e37c597b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756419999439376835_3_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 2304, 0, 1073741824, 3840, 0, 1073741824, 4992, 0, 1207959552, 4992, 0, 1207959552, 8576, 0, 65, 8576, 0, 65] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420000400941683_4_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420000400941683_4_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6068066 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420000400941683_4_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 52))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 47))) { + if (((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 1, 0, 4032, 0, 4096, 4048, 0, 4096, 4064, 0, 4096] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420000646467764_5_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420000646467764_5_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab02afc0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420000646467764_5_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,244 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 53))) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 2496, 1717986918, 1717986918, 3408, 32, 0, 3424, 32, 0, 4624, 1024, 33554432, 4624, 1024, 33554432, 4628, 1024, 33554432, 4628, 1024, 33554432, 4632, 1024, 33554432, 4632, 1024, 33554432, 4640, 1024, 33554432, 4640, 1024, 33554432, 4644, 1024, 33554432, 4644, 1024, 33554432, 4648, 1024, 33554432, 4648, 1024, 33554432, 5072, 0, 131072, 5088, 0, 131072, 5888, 85, 0, 5888, 85, 0, 5888, 85, 0, 5888, 85, 0, 7360, 73, 0, 7360, 73, 0, 7360, 73, 0, 7936, 272696336, 68174084, 7936, 272696336, 68174084, 7936, 272696336, 68174084, 7936, 272696336, 68174084, 7936, 272696336, 68174084, 7936, 272696336, 68174084, 7936, 272696336, 68174084, 7936, 272696336, 68174084, 7936, 272696336, 68174084, 7936, 272696336, 68174084, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345, 8832, 68174084, 1090785345] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420063962211394_6_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420063962211394_6_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b0f5503e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420063962211394_6_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 61))) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 237 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 1472, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4224, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4240, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4256, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 4928, 2181570690, 545392672, 6208, 0, 1073741824, 6784, 545392672, 136348168, 6784, 545392672, 136348168, 6784, 545392672, 136348168, 6784, 545392672, 136348168, 6784, 545392672, 136348168, 6784, 545392672, 136348168, 6784, 545392672, 136348168, 6784, 545392672, 136348168, 6784, 545392672, 136348168, 6784, 545392672, 136348168, 10048, 68174084, 17043521, 10048, 68174084, 17043521, 10048, 68174084, 17043521, 10048, 68174084, 17043521, 10048, 68174084, 17043521, 10048, 68174084, 17043521, 10048, 68174084, 17043521, 10048, 68174084, 17043521, 10048, 68174084, 17043521, 10048, 68174084, 17043521] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420104064267926_9_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420104064267926_9_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b291b38a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420104064267926_9_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 35))) { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 375 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 0, 67108864, 1104, 0, 67108864, 3904, 0, 536870912, 3920, 0, 536870912, 3936, 0, 536870912, 5060, 682, 2818572288, 5060, 682, 2818572288, 5060, 682, 2818572288, 5060, 682, 2818572288, 5060, 682, 2818572288, 5060, 682, 2818572288, 5060, 682, 2818572288, 5060, 682, 2818572288, 5064, 682, 2818572288, 5064, 682, 2818572288, 5064, 682, 2818572288, 5064, 682, 2818572288, 5064, 682, 2818572288, 5064, 682, 2818572288, 5064, 682, 2818572288, 5064, 682, 2818572288, 5076, 682, 2818572288, 5076, 682, 2818572288, 5076, 682, 2818572288, 5076, 682, 2818572288, 5076, 682, 2818572288, 5076, 682, 2818572288, 5076, 682, 2818572288, 5076, 682, 2818572288, 5080, 682, 2818572288, 5080, 682, 2818572288, 5080, 682, 2818572288, 5080, 682, 2818572288, 5080, 682, 2818572288, 5080, 682, 2818572288, 5080, 682, 2818572288, 5080, 682, 2818572288, 5092, 682, 2818572288, 5092, 682, 2818572288, 5092, 682, 2818572288, 5092, 682, 2818572288, 5092, 682, 2818572288, 5092, 682, 2818572288, 5092, 682, 2818572288, 5092, 682, 2818572288, 5096, 682, 2818572288, 5096, 682, 2818572288, 5096, 682, 2818572288, 5096, 682, 2818572288, 5096, 682, 2818572288, 5096, 682, 2818572288, 5096, 682, 2818572288, 5096, 682, 2818572288, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5764, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5768, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5780, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5784, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5796, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720, 5800, 43690, 2852126720] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420442139048993_12_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420442139048993_12_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e54bb167 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420442139048993_12_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 52)) { + if ((WaveGetLaneIndex() < 28)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 35)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 51)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 23)) { + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2880, 2097152, 0, 2896, 2097152, 0, 2912, 2097152, 0, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7424, 604127268, 1076101696, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 7872, 978670, 0, 13888, 10240, 1073741824, 13888, 10240, 1073741824, 13888, 10240, 1073741824, 13904, 10240, 1073741824, 13904, 10240, 1073741824, 13904, 10240, 1073741824, 13920, 10240, 1073741824, 13920, 10240, 1073741824, 13920, 10240, 1073741824] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420617887238247_14_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420617887238247_14_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0c30ee6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420617887238247_14_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 59))) { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 1344, 136348168, 2181570690, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513, 7744, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420641382588741_16_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420641382588741_16_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d8b06e1f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420641382588741_16_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,455 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 18)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 49)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 50)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 42)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 62)) { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 492 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 4369, 268435456, 1344, 4369, 268435456, 1344, 4369, 268435456, 1344, 4369, 268435456, 1344, 4369, 268435456, 1360, 4369, 268435456, 1360, 4369, 268435456, 1360, 4369, 268435456, 1360, 4369, 268435456, 1360, 4369, 268435456, 1376, 4369, 268435456, 1376, 4369, 268435456, 1376, 4369, 268435456, 1376, 4369, 268435456, 1376, 4369, 268435456, 2176, 1, 0, 2192, 1, 0, 2208, 1, 0, 2752, 65552, 0, 2752, 65552, 0, 2768, 65552, 0, 2768, 65552, 0, 2784, 65552, 0, 2784, 65552, 0, 3072, 65808, 0, 3072, 65808, 0, 3072, 65808, 0, 3088, 65808, 0, 3088, 65808, 0, 3088, 65808, 0, 3104, 65808, 0, 3104, 65808, 0, 3104, 65808, 0, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4416, 1145324612, 1145324612, 4864, 838860, 0, 4864, 838860, 0, 4864, 838860, 0, 4864, 838860, 0, 4864, 838860, 0, 4864, 838860, 0, 4864, 838860, 0, 4864, 838860, 0, 4864, 838860, 0, 4864, 838860, 0, 5760, 85, 0, 5760, 85, 0, 5760, 85, 0, 5760, 85, 0, 6592, 0, 2454192128, 6592, 0, 2454192128, 6592, 0, 2454192128, 6592, 0, 2454192128, 6592, 0, 2454192128, 9088, 136348168, 8322, 9088, 136348168, 8322, 9088, 136348168, 8322, 9088, 136348168, 8322, 9088, 136348168, 8322, 9088, 136348168, 8322, 9088, 136348168, 8322, 9088, 136348168, 8322, 9728, 1, 0, 10624, 1074004032, 1024, 10624, 1074004032, 1024, 10624, 1074004032, 1024, 10624, 1074004032, 1024, 11072, 32776, 0, 11072, 32776, 0, 16400, 0, 1227132928, 16400, 0, 1227132928, 16400, 0, 1227132928, 16400, 0, 1227132928, 16400, 0, 1227132928, 16400, 0, 1227132928, 16400, 0, 1227132928, 16416, 0, 1227132928, 16416, 0, 1227132928, 16416, 0, 1227132928, 16416, 0, 1227132928, 16416, 0, 1227132928, 16416, 0, 1227132928, 16416, 0, 1227132928, 16848, 0, 1227128832, 16848, 0, 1227128832, 16848, 0, 1227128832, 16848, 0, 1227128832, 16848, 0, 1227128832, 16848, 0, 1227128832, 16864, 0, 1227128832, 16864, 0, 1227128832, 16864, 0, 1227128832, 16864, 0, 1227128832, 16864, 0, 1227128832, 16864, 0, 1227128832, 17920, 73, 0, 17920, 73, 0, 17920, 73, 0, 18496, 272696336, 68174084, 18496, 272696336, 68174084, 18496, 272696336, 68174084, 18496, 272696336, 68174084, 18496, 272696336, 68174084, 18496, 272696336, 68174084, 18496, 272696336, 68174084, 18496, 272696336, 68174084, 18496, 272696336, 68174084, 18496, 272696336, 68174084, 19456, 9586980, 0, 19456, 9586980, 0, 19456, 9586980, 0, 19456, 9586980, 0, 19456, 9586980, 0, 19456, 9586980, 0, 19456, 9586980, 0, 19456, 9586980, 0, 19472, 9586980, 0, 19472, 9586980, 0, 19472, 9586980, 0, 19472, 9586980, 0, 19472, 9586980, 0, 19472, 9586980, 0, 19472, 9586980, 0, 19472, 9586980, 0, 19488, 9586980, 0, 19488, 9586980, 0, 19488, 9586980, 0, 19488, 9586980, 0, 19488, 9586980, 0, 19488, 9586980, 0, 19488, 9586980, 0, 19488, 9586980, 0, 20544, 0, 1227128832, 20544, 0, 1227128832, 20544, 0, 1227128832, 20544, 0, 1227128832, 20544, 0, 1227128832, 20544, 0, 1227128832, 20560, 0, 1227128832, 20560, 0, 1227128832, 20560, 0, 1227128832, 20560, 0, 1227128832, 20560, 0, 1227128832, 20560, 0, 1227128832, 20576, 0, 1227128832, 20576, 0, 1227128832, 20576, 0, 1227128832, 20576, 0, 1227128832, 20576, 0, 1227128832, 20576, 0, 1227128832] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420774700692718_19_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420774700692718_19_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2758f6c1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420774700692718_19_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 38))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((135 << 6) | (i0 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 1048576, 1048592, 1408, 1048576, 1048592, 1408, 1048576, 1048592, 1424, 1048576, 1048592, 1424, 1048576, 1048592, 1424, 1048576, 1048592, 4800, 8192, 0, 4804, 8192, 0, 4808, 8192, 0, 4816, 8192, 0, 4820, 8192, 0, 4824, 8192, 0, 7684, 16809984, 0, 7684, 16809984, 0, 7688, 16809984, 0, 7688, 16809984, 0, 7700, 16809984, 0, 7700, 16809984, 0, 7704, 16809984, 0, 7704, 16809984, 0, 8644, 294912, 0, 8644, 294912, 0, 8648, 294912, 0, 8648, 294912, 0, 8660, 294912, 0, 8660, 294912, 0, 8664, 294912, 0, 8664, 294912, 0, 9600, 83886080, 8, 9600, 83886080, 8, 9600, 83886080, 8, 9616, 83886080, 8, 9616, 83886080, 8, 9616, 83886080, 8] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420945702689176_25_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420945702689176_25_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..32f548e3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420945702689176_25_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,210 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 62))) { + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 267 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2368, 279620, 1140850688, 2368, 279620, 1140850688, 2368, 279620, 1140850688, 2368, 279620, 1140850688, 2368, 279620, 1140850688, 2368, 279620, 1140850688, 2368, 279620, 1140850688, 2384, 279620, 1140850688, 2384, 279620, 1140850688, 2384, 279620, 1140850688, 2384, 279620, 1140850688, 2384, 279620, 1140850688, 2384, 279620, 1140850688, 2384, 279620, 1140850688, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 3984, 1145324612, 1145324612, 4672, 4, 1073741824, 4672, 4, 1073741824, 4688, 4, 1073741824, 4688, 4, 1073741824, 5824, 8947848, 0, 5824, 8947848, 0, 5824, 8947848, 0, 5824, 8947848, 0, 5824, 8947848, 0, 5824, 8947848, 0, 5840, 8947848, 0, 5840, 8947848, 0, 5840, 8947848, 0, 5840, 8947848, 0, 5840, 8947848, 0, 5840, 8947848, 0, 5856, 8947848, 0, 5856, 8947848, 0, 5856, 8947848, 0, 5856, 8947848, 0, 5856, 8947848, 0, 5856, 8947848, 0, 6272, 0, 2290614272, 6272, 0, 2290614272, 6272, 0, 2290614272, 6272, 0, 2290614272, 6288, 0, 2290614272, 6288, 0, 2290614272, 6288, 0, 2290614272, 6288, 0, 2290614272, 6304, 0, 2290614272, 6304, 0, 2290614272, 6304, 0, 2290614272, 6304, 0, 2290614272, 9088, 0, 1073741824, 9104, 0, 1073741824, 9120, 0, 1073741824, 11264, 1073742080, 16778240, 11264, 1073742080, 16778240, 11264, 1073742080, 16778240, 11264, 1073742080, 16778240] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420953331366380_26_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420953331366380_26_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420953331366380_26_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420981936595767_28_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420981936595767_28_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c0a1f29 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420981936595767_28_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 55)) { + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 51)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 32))) { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 363 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 0, 268435456, 2064, 0, 268435456, 3776, 0, 8388608, 3792, 0, 8388608, 5760, 524544, 4, 5760, 524544, 4, 5760, 524544, 4, 7744, 33558529, 2048, 7744, 33558529, 2048, 7744, 33558529, 2048, 7744, 33558529, 2048, 7760, 33558529, 2048, 7760, 33558529, 2048, 7760, 33558529, 2048, 7760, 33558529, 2048, 11392, 0, 128, 11408, 0, 128, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11968, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 11984, 2863311530, 2796074, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12544, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 12560, 1414878533, 5592404, 13248, 128, 128, 13248, 128, 128] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756420989733302489_29_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756420989733302489_29_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c0f5b53a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756420989733302489_29_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,183 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 52))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 49))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8384, 0, 1048576, 8388, 0, 1048576, 8400, 0, 1048576, 8404, 0, 1048576] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421633455627722_32_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421633455627722_32_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f7e3154b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421633455627722_32_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,314 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 56))) { + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 58))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 44)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 342 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2688, 65535, 4278190080, 2432, 2147483648, 1048576, 2432, 2147483648, 1048576, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 2048, 715784192, 11184810, 3328, 17, 0, 3328, 17, 0, 14016, 537002016, 2097664, 14016, 537002016, 2097664, 14016, 537002016, 2097664, 14016, 537002016, 2097664, 14016, 537002016, 2097664, 14032, 537002016, 2097664, 14032, 537002016, 2097664, 14032, 537002016, 2097664, 14032, 537002016, 2097664, 14032, 537002016, 2097664, 14048, 537002016, 2097664, 14048, 537002016, 2097664, 14048, 537002016, 2097664, 14048, 537002016, 2097664, 14048, 537002016, 2097664, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 14336, 1145324612, 1145324612, 16832, 85, 0, 16832, 85, 0, 16832, 85, 0, 16832, 85, 0, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765, 17408, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421638704591217_33_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421638704591217_33_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b250ff9d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421638704591217_33_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,102 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 21)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 576, 2097151, 0, 1616, 1398101, 0, 1616, 1398101, 0, 1616, 1398101, 0, 1616, 1398101, 0, 1616, 1398101, 0, 1616, 1398101, 0, 1616, 1398101, 0, 1616, 1398101, 0, 1616, 1398101, 0, 1616, 1398101, 0, 1616, 1398101, 0, 2192, 699050, 0, 2192, 699050, 0, 2192, 699050, 0, 2192, 699050, 0, 2192, 699050, 0, 2192, 699050, 0, 2192, 699050, 0, 2192, 699050, 0, 2192, 699050, 0, 2192, 699050, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421648868254284_35_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421648868254284_35_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ade2b7fa --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421648868254284_35_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,226 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 1472, 2317918858, 2726963362, 2432, 0, 536870912, 2448, 0, 536870912, 6336, 70737, 268435456, 6336, 70737, 268435456, 6336, 70737, 268435456, 6336, 70737, 268435456, 6336, 70737, 268435456, 6336, 70737, 268435456, 6336, 70737, 268435456, 8192, 21304401, 0, 8192, 21304401, 0, 8192, 21304401, 0, 8192, 21304401, 0, 8192, 21304401, 0, 8192, 21304401, 0, 8192, 21304401, 0, 8192, 21304401, 0, 8192, 21304401, 0, 8208, 21304401, 0, 8208, 21304401, 0, 8208, 21304401, 0, 8208, 21304401, 0, 8208, 21304401, 0, 8208, 21304401, 0, 8208, 21304401, 0, 8208, 21304401, 0, 8208, 21304401, 0, 9600, 81, 340869120, 9600, 81, 340869120, 9600, 81, 340869120, 9600, 81, 340869120, 9600, 81, 340869120, 9600, 81, 340869120, 9600, 81, 340869120, 9600, 81, 340869120, 9600, 81, 340869120, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 9920, 613566756, 1227133513, 10816, 85, 0, 10816, 85, 0, 10816, 85, 0, 10816, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421673400922884_37_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421673400922884_37_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f191cf38 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421673400922884_37_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,300 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 59))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 53))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 52))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 46)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8080, 8194, 537001984, 8080, 8194, 537001984, 8080, 8194, 537001984, 8080, 8194, 537001984, 8096, 8194, 537001984, 8096, 8194, 537001984, 8096, 8194, 537001984, 8096, 8194, 537001984, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 10624, 1717986918, 1717986918, 11264, 8, 0, 13760, 8390656, 134250504, 13760, 8390656, 134250504, 13760, 8390656, 134250504, 13760, 8390656, 134250504, 13760, 8390656, 134250504] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421765942435380_39_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421765942435380_39_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c00d9c6a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421765942435380_39_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 55))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5184, 272696336, 68174084, 5184, 272696336, 68174084, 5184, 272696336, 68174084, 5184, 272696336, 68174084, 5184, 272696336, 68174084, 5184, 272696336, 68174084, 5184, 272696336, 68174084, 5184, 272696336, 68174084, 5184, 272696336, 68174084, 5184, 272696336, 68174084] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421766579413472_40_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421766579413472_40_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..61e7a92e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421766579413472_40_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,193 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 54))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 49)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 62))) { + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 31)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 33)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 0, 285212672, 1344, 0, 285212672, 5184, 4, 1145044992, 5184, 4, 1145044992, 5184, 4, 1145044992, 5184, 4, 1145044992, 6144, 0, 2290089984, 6144, 0, 2290089984, 6144, 0, 2290089984, 6160, 0, 2290089984, 6160, 0, 2290089984, 6160, 0, 2290089984, 6176, 0, 2290089984, 6176, 0, 2290089984, 6176, 0, 2290089984, 6592, 0, 2290089984, 6592, 0, 2290089984, 6592, 0, 2290089984, 6608, 0, 2290089984, 6608, 0, 2290089984, 6608, 0, 2290089984, 6624, 0, 2290089984, 6624, 0, 2290089984, 6624, 0, 2290089984] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421773213244416_42_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421773213244416_42_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..52479d00 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421773213244416_42_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 43))) { + if ((WaveGetLaneIndex() == 62)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 46))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 52))) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((212 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 2)) { + break; + } + } + if ((counter3 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 195 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 3200, 1, 268435456, 3200, 1, 268435456, 5008, 0, 572653568, 5008, 0, 572653568, 5008, 0, 572653568, 5008, 0, 572653568, 5024, 0, 572653568, 5024, 0, 572653568, 5024, 0, 572653568, 5024, 0, 572653568, 5908, 34, 0, 5908, 34, 0, 5912, 34, 0, 5912, 34, 0, 5916, 34, 0, 5916, 34, 0, 5924, 34, 0, 5924, 34, 0, 5928, 34, 0, 5928, 34, 0, 5932, 34, 0, 5932, 34, 0, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 13584, 8, 2290089984, 13584, 8, 2290089984, 13584, 8, 2290089984, 13584, 8, 2290089984, 13588, 8, 2290089984, 13588, 8, 2290089984, 13588, 8, 2290089984, 13588, 8, 2290089984, 13592, 8, 2290089984, 13592, 8, 2290089984, 13592, 8, 2290089984, 13592, 8, 2290089984, 13600, 8, 2290089984, 13600, 8, 2290089984, 13600, 8, 2290089984, 13600, 8, 2290089984, 13604, 8, 2290089984, 13604, 8, 2290089984, 13604, 8, 2290089984, 13604, 8, 2290089984, 13608, 8, 2290089984, 13608, 8, 2290089984, 13608, 8, 2290089984, 13608, 8, 2290089984, 14848, 8, 2147483648, 14848, 8, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421784365392146_43_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421784365392146_43_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..00ef6d68 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421784365392146_43_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,209 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 633 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 3, 2147483648, 1216, 3, 2147483648, 1216, 3, 2147483648, 1856, 17, 0, 1856, 17, 0, 2752, 1092, 1145044992, 2752, 1092, 1145044992, 2752, 1092, 1145044992, 2752, 1092, 1145044992, 2752, 1092, 1145044992, 2752, 1092, 1145044992, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 6144, 2863300608, 699050, 7056, 0, 1015808, 7056, 0, 1015808, 7056, 0, 1015808, 7056, 0, 1015808, 7056, 0, 1015808, 7072, 0, 1015808, 7072, 0, 1015808, 7072, 0, 1015808, 7072, 0, 1015808, 7072, 0, 1015808, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8084, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8088, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8100, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 8104, 1431650304, 349525, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9812, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9816, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9828, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 9832, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050, 10368, 2863300608, 699050] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421892747951315_45_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421892747951315_45_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2136e6e9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421892747951315_45_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,268 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 44))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 3))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 231 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 7, 4227858432, 1088, 7, 4227858432, 1088, 7, 4227858432, 1088, 7, 4227858432, 1088, 7, 4227858432, 1088, 7, 4227858432, 1088, 7, 4227858432, 1088, 7, 4227858432, 1088, 7, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 2240, 127, 4227858432, 3200, 127, 0, 3200, 127, 0, 3200, 127, 0, 3200, 127, 0, 3200, 127, 0, 3200, 127, 0, 3200, 127, 0, 3216, 127, 0, 3216, 127, 0, 3216, 127, 0, 3216, 127, 0, 3216, 127, 0, 3216, 127, 0, 3216, 127, 0, 3232, 127, 0, 3232, 127, 0, 3232, 127, 0, 3232, 127, 0, 3232, 127, 0, 3232, 127, 0, 3232, 127, 0, 8512, 32, 0, 10560, 73, 2147483648, 10560, 73, 2147483648, 10560, 73, 2147483648, 10560, 73, 2147483648, 11200, 65, 0, 11200, 65, 0, 11776, 65, 0, 11776, 65, 0, 12480, 73, 2147483648, 12480, 73, 2147483648, 12480, 73, 2147483648, 12480, 73, 2147483648, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421894875327989_46_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421894875327989_46_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2455cdb0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421894875327989_46_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,318 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 46))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 42)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((236 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((254 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 32768, 0, 5632, 1, 286261248, 5632, 1, 286261248, 5632, 1, 286261248, 5632, 1, 286261248, 7936, 1, 285212672, 7936, 1, 285212672, 7936, 1, 285212672, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 8512, 286331152, 69905, 9680, 16, 69632, 9680, 16, 69632, 9680, 16, 69632, 9696, 16, 69632, 9696, 16, 69632, 9696, 16, 69632, 9712, 16, 69632, 9712, 16, 69632, 9712, 16, 69632, 10896, 4096, 0, 10900, 4096, 0, 10912, 4096, 0, 10916, 4096, 0, 10928, 4096, 0, 10932, 4096, 0, 11600, 272, 0, 11600, 272, 0, 11616, 272, 0, 11616, 272, 0, 11632, 272, 0, 11632, 272, 0, 14484, 34, 572522496, 14484, 34, 572522496, 14484, 34, 572522496, 14484, 34, 572522496, 14484, 34, 572522496, 14488, 34, 572522496, 14488, 34, 572522496, 14488, 34, 572522496, 14488, 34, 572522496, 14488, 34, 572522496, 14500, 34, 572522496, 14500, 34, 572522496, 14500, 34, 572522496, 14500, 34, 572522496, 14500, 34, 572522496, 14504, 34, 572522496, 14504, 34, 572522496, 14504, 34, 572522496, 14504, 34, 572522496, 14504, 34, 572522496, 16276, 8738, 536870912, 16276, 8738, 536870912, 16276, 8738, 536870912, 16276, 8738, 536870912, 16276, 8738, 536870912, 16280, 8738, 536870912, 16280, 8738, 536870912, 16280, 8738, 536870912, 16280, 8738, 536870912, 16280, 8738, 536870912, 16292, 8738, 536870912, 16292, 8738, 536870912, 16292, 8738, 536870912, 16292, 8738, 536870912, 16292, 8738, 536870912, 16296, 8738, 536870912, 16296, 8738, 536870912, 16296, 8738, 536870912, 16296, 8738, 536870912, 16296, 8738, 536870912, 17600, 64, 0, 18176, 4195328, 67125252, 18176, 4195328, 67125252, 18176, 4195328, 67125252, 18176, 4195328, 67125252, 18176, 4195328, 67125252, 18496, 67125252, 1074004032, 18496, 67125252, 1074004032, 18496, 67125252, 1074004032, 18496, 67125252, 1074004032, 18496, 67125252, 1074004032, 18496, 67125252, 1074004032, 18944, 559240, 0, 18944, 559240, 0, 18944, 559240, 0, 18944, 559240, 0, 18944, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421939669057419_47_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421939669057419_47_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ac3e3bfa --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421939669057419_47_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,188 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 30)) { + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 33))) { + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 55))) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 24)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 58)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 7744, 536870911, 0, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 8192, 0, 4294967264, 11712, 0, 3221225472, 11712, 0, 3221225472] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756421940347835171_48_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756421940347835171_48_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5f50614d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756421940347835171_48_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 35))) { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 52)) { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 58))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 24))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((357 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((366 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 177 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6672, 2148007936, 134217736, 6672, 2148007936, 134217736, 6672, 2148007936, 134217736, 6672, 2148007936, 134217736, 6688, 2148007936, 134217736, 6688, 2148007936, 134217736, 6688, 2148007936, 134217736, 6688, 2148007936, 134217736, 6704, 2148007936, 134217736, 6704, 2148007936, 134217736, 6704, 2148007936, 134217736, 6704, 2148007936, 134217736, 10752, 1, 0, 11328, 268501008, 1048832, 11328, 268501008, 1048832, 11328, 268501008, 1048832, 11328, 268501008, 1048832, 11328, 268501008, 1048832, 11648, 1048832, 16781313, 11648, 1048832, 16781313, 11648, 1048832, 16781313, 11648, 1048832, 16781313, 11648, 1048832, 16781313, 13328, 0, 32, 13344, 0, 32, 14928, 0, 2, 14944, 0, 2, 17856, 4195328, 67125252, 17856, 4195328, 67125252, 17856, 4195328, 67125252, 17856, 4195328, 67125252, 17856, 4195328, 67125252, 20480, 0, 1074004032, 20480, 0, 1074004032, 20480, 0, 1074004032, 21776, 34952, 2290614272, 21776, 34952, 2290614272, 21776, 34952, 2290614272, 21776, 34952, 2290614272, 21776, 34952, 2290614272, 21776, 34952, 2290614272, 21776, 34952, 2290614272, 21776, 34952, 2290614272, 21792, 34952, 2290614272, 21792, 34952, 2290614272, 21792, 34952, 2290614272, 21792, 34952, 2290614272, 21792, 34952, 2290614272, 21792, 34952, 2290614272, 21792, 34952, 2290614272, 21792, 34952, 2290614272, 21808, 34952, 2290614272, 21808, 34952, 2290614272, 21808, 34952, 2290614272, 21808, 34952, 2290614272, 21808, 34952, 2290614272, 21808, 34952, 2290614272, 21808, 34952, 2290614272, 21808, 34952, 2290614272] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422001878650173_49_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422001878650173_49_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7e48c91e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422001878650173_49_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 0, 1048576, 1792, 1073741824, 262144, 1792, 1073741824, 262144, 1536, 4096, 2097153, 1536, 4096, 2097153, 1536, 4096, 2097153, 2816, 85, 0, 2816, 85, 0, 2816, 85, 0, 2816, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422002620943444_50_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422002620943444_50_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c272376 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422002620943444_50_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 54)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 58))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422002797977984_51_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422002797977984_51_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c547860b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422002797977984_51_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0, 576, 127, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422002985819782_52_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422002985819782_52_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9accc793 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422002985819782_52_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,219 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 351 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 3152, 42, 0, 3152, 42, 0, 3152, 42, 0, 3168, 42, 0, 3168, 42, 0, 3168, 42, 0, 3184, 42, 0, 3184, 42, 0, 3184, 42, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4688, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4704, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 4720, 9087658, 0, 5264, 0, 2852126720, 5264, 0, 2852126720, 5264, 0, 2852126720, 5264, 0, 2852126720, 5280, 0, 2852126720, 5280, 0, 2852126720, 5280, 0, 2852126720, 5280, 0, 2852126720, 5296, 0, 2852126720, 5296, 0, 2852126720, 5296, 0, 2852126720, 5296, 0, 2852126720, 7828, 262144, 73728, 7828, 262144, 73728, 7828, 262144, 73728, 7844, 262144, 73728, 7844, 262144, 73728, 7844, 262144, 73728, 7860, 262144, 73728, 7860, 262144, 73728, 7860, 262144, 73728, 9536, 272696336, 68174084, 9536, 272696336, 68174084, 9536, 272696336, 68174084, 9536, 272696336, 68174084, 9536, 272696336, 68174084, 9536, 272696336, 68174084, 9536, 272696336, 68174084, 9536, 272696336, 68174084, 9536, 272696336, 68174084, 9536, 272696336, 68174084, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513, 9856, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422057647177009_53_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422057647177009_53_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..948376be --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422057647177009_53_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,208 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 26)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 4608, 2863311530, 2863311530, 5248, 73, 0, 5248, 73, 0, 5248, 73, 0, 7440, 2, 0, 7456, 2, 0, 9040, 0, 536870912, 9056, 0, 536870912, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 9536, 546457892, 1227133513, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765, 10112, 1364546901, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422083452425451_55_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422083452425451_55_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8bed2285 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422083452425451_55_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 255, 0, 2752, 255, 0, 2752, 255, 0, 2752, 255, 0, 2752, 255, 0, 2752, 255, 0, 2752, 255, 0, 2752, 255, 0, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2496, 0, 4294966272, 2240, 147456, 0, 2240, 147456, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422083953514683_56_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422083953514683_56_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ce77c0cf --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422083953514683_56_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,188 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 36)) { + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 7556, 8388608, 0, 7560, 8388608, 0, 7572, 8388608, 0, 7576, 8388608, 0, 7588, 8388608, 0, 7592, 8388608, 0, 8000, 0, 1227133513, 8000, 0, 1227133513, 8000, 0, 1227133513, 8000, 0, 1227133513, 8000, 0, 1227133513, 8000, 0, 1227133513, 8000, 0, 1227133513, 8000, 0, 1227133513, 8000, 0, 1227133513, 8000, 0, 1227133513, 8000, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8016, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513, 8032, 0, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422087234478889_57_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422087234478889_57_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eab866f6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422087234478889_57_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,190 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 195 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 2368, 85, 0, 3008, 8, 0, 4736, 2080, 136347648, 4736, 2080, 136347648, 4736, 2080, 136347648, 4736, 2080, 136347648, 4736, 2080, 136347648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422141842486630_59_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422141842486630_59_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c1876a96 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422141842486630_59_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,476 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 23)) { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() < 28)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 62)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 36)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 63))) { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((330 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((348 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (381 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((395 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((411 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((420 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (427 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (441 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (455 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 489 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2176, 8, 0, 3072, 133152, 0, 3072, 133152, 0, 3072, 133152, 0, 3648, 2796202, 0, 3648, 2796202, 0, 3648, 2796202, 0, 3648, 2796202, 0, 3648, 2796202, 0, 3648, 2796202, 0, 3648, 2796202, 0, 3648, 2796202, 0, 3648, 2796202, 0, 3648, 2796202, 0, 3648, 2796202, 0, 5760, 0, 2684354560, 5760, 0, 2684354560, 5776, 0, 2684354560, 5776, 0, 2684354560, 5792, 0, 2684354560, 5792, 0, 2684354560, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6208, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6224, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 6240, 0, 2863311520, 7168, 0, 2097152, 7808, 17, 0, 7808, 17, 0, 12544, 0, 4194304, 12560, 0, 4194304, 13120, 4195328, 67125252, 13120, 4195328, 67125252, 13120, 4195328, 67125252, 13120, 4195328, 67125252, 13120, 4195328, 67125252, 20096, 0, 2147483648, 20112, 0, 2147483648, 22272, 0, 2147483648, 22288, 0, 2147483648, 24384, 8388608, 0, 25296, 0, 143130624, 25296, 0, 143130624, 25296, 0, 143130624, 25312, 0, 143130624, 25312, 0, 143130624, 25312, 0, 143130624, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26896, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 26912, 2290649224, 143165576, 28224, 73, 0, 28224, 73, 0, 28224, 73, 0, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 28800, 1363481681, 340870420, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513, 29120, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422248835006743_60_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422248835006743_60_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bd15b4cd --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422248835006743_60_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 57))) { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 20)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 16))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 729 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3648, 262144, 262144, 3648, 262144, 262144, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5056, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5072, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5088, 0, 4260888576, 5696, 511, 0, 5696, 511, 0, 5696, 511, 0, 5696, 511, 0, 5696, 511, 0, 5696, 511, 0, 5696, 511, 0, 5696, 511, 0, 5696, 511, 0, 5712, 511, 0, 5712, 511, 0, 5712, 511, 0, 5712, 511, 0, 5712, 511, 0, 5712, 511, 0, 5712, 511, 0, 5712, 511, 0, 5712, 511, 0, 5728, 511, 0, 5728, 511, 0, 5728, 511, 0, 5728, 511, 0, 5728, 511, 0, 5728, 511, 0, 5728, 511, 0, 5728, 511, 0, 5728, 511, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8320, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8336, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8352, 786431, 0, 8768, 0, 4227858432, 8768, 0, 4227858432, 8768, 0, 4227858432, 8768, 0, 4227858432, 8768, 0, 4227858432, 8768, 0, 4227858432, 8784, 0, 4227858432, 8784, 0, 4227858432, 8784, 0, 4227858432, 8784, 0, 4227858432, 8784, 0, 4227858432, 8784, 0, 4227858432, 8800, 0, 4227858432, 8800, 0, 4227858432, 8800, 0, 4227858432, 8800, 0, 4227858432, 8800, 0, 4227858432, 8800, 0, 4227858432, 9984, 0, 4227858432, 9984, 0, 4227858432, 9984, 0, 4227858432, 9984, 0, 4227858432, 9984, 0, 4227858432, 9984, 0, 4227858432, 9988, 0, 4227858432, 9988, 0, 4227858432, 9988, 0, 4227858432, 9988, 0, 4227858432, 9988, 0, 4227858432, 9988, 0, 4227858432, 10000, 0, 4227858432, 10000, 0, 4227858432, 10000, 0, 4227858432, 10000, 0, 4227858432, 10000, 0, 4227858432, 10000, 0, 4227858432, 10004, 0, 4227858432, 10004, 0, 4227858432, 10004, 0, 4227858432, 10004, 0, 4227858432, 10004, 0, 4227858432, 10004, 0, 4227858432, 10016, 0, 4227858432, 10016, 0, 4227858432, 10016, 0, 4227858432, 10016, 0, 4227858432, 10016, 0, 4227858432, 10016, 0, 4227858432, 10020, 0, 4227858432, 10020, 0, 4227858432, 10020, 0, 4227858432, 10020, 0, 4227858432, 10020, 0, 4227858432, 10020, 0, 4227858432, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10624, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10640, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 10656, 1835007, 0, 11264, 85, 0, 11264, 85, 0, 11264, 85, 0, 11264, 85, 0, 12224, 0, 2048, 12240, 0, 2048, 12256, 0, 2048] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422570178764930_61_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422570178764930_61_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422570178764930_61_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422570464722958_62_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422570464722958_62_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..940f53f6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422570464722958_62_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,278 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 44))) { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 55))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 43))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((263 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((279 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((293 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7808, 0, 2048, 7824, 0, 2048, 10432, 33554432, 134217728, 10432, 33554432, 134217728, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14480, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912, 14496, 127, 4294950912] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422720339773490_64_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422720339773490_64_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fb93476c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422720339773490_64_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,366 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 42)) { + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 44)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 23)) { + if ((WaveGetLaneIndex() >= 53)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 243 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2512, 0, 4198400, 2512, 0, 4198400, 2528, 0, 4198400, 2528, 0, 4198400, 3668, 0, 2097152, 3672, 0, 2097152, 3684, 0, 2097152, 3688, 0, 2097152, 5456, 0, 8658944, 5456, 0, 8658944, 5456, 0, 8658944, 5472, 0, 8658944, 5472, 0, 8658944, 5472, 0, 8658944, 6720, 17, 0, 6720, 17, 0, 7872, 0, 33562624, 7872, 0, 33562624, 7888, 0, 33562624, 7888, 0, 33562624, 7904, 0, 33562624, 7904, 0, 33562624, 8772, 512, 0, 8788, 512, 0, 8804, 512, 0, 9408, 512, 0, 9424, 512, 0, 9440, 512, 0, 11088, 537002016, 0, 11088, 537002016, 0, 11088, 537002016, 0, 11104, 537002016, 0, 11104, 537002016, 0, 11104, 537002016, 0, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12160, 1145324612, 1145324612, 12608, 559240, 0, 12608, 559240, 0, 12608, 559240, 0, 12608, 559240, 0, 12608, 559240, 0, 13248, 17, 0, 13248, 17, 0, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 14144, 1145324612, 1145324612, 15248, 0, 2147483648, 15264, 0, 2147483648, 15280, 0, 2147483648, 15696, 0, 2147483648, 15712, 0, 2147483648, 15728, 0, 2147483648, 18624, 524288, 0, 18640, 524288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422889637889077_65_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422889637889077_65_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..78d10946 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422889637889077_65_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 49))) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3920, 1048576, 0, 3924, 1048576, 0, 3928, 1048576, 0, 3936, 1048576, 0, 3940, 1048576, 0, 3944, 1048576, 0, 3952, 1048576, 0, 3956, 1048576, 0, 3960, 1048576, 0, 4368, 0, 1426063360, 4368, 0, 1426063360, 4368, 0, 1426063360, 4368, 0, 1426063360, 4384, 0, 1426063360, 4384, 0, 1426063360, 4384, 0, 1426063360, 4384, 0, 1426063360, 4400, 0, 1426063360, 4400, 0, 1426063360, 4400, 0, 1426063360, 4400, 0, 1426063360] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422896102838260_66_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422896102838260_66_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53c93eda --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422896102838260_66_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422922324197449_68_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422922324197449_68_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e186e708 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422922324197449_68_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 2112, 1431655765, 1431655765, 1856, 671088640, 2097280, 1856, 671088640, 2097280, 1856, 671088640, 2097280, 1856, 671088640, 2097280] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756422922604874609_69_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756422922604874609_69_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db44c0de --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756422922604874609_69_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,140 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 61))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 165 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 832, 2863311530, 2863311530, 1472, 8, 0, 2368, 545392672, 136348168, 2368, 545392672, 136348168, 2368, 545392672, 136348168, 2368, 545392672, 136348168, 2368, 545392672, 136348168, 2368, 545392672, 136348168, 2368, 545392672, 136348168, 2368, 545392672, 136348168, 2368, 545392672, 136348168, 2368, 545392672, 136348168, 4304, 268435488, 131104, 4304, 268435488, 131104, 4304, 268435488, 131104, 4304, 268435488, 131104, 4320, 268435488, 131104, 4320, 268435488, 131104, 4320, 268435488, 131104, 4320, 268435488, 131104, 4336, 268435488, 131104, 4336, 268435488, 131104, 4336, 268435488, 131104, 4336, 268435488, 131104] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756429437873348583_72_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756429437873348583_72_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..06e57b20 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756429437873348583_72_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,319 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 52))) { + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((182 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 45)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 774 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 1073741824, 4194304, 2688, 1073741824, 4194304, 2432, 2, 525568, 2432, 2, 525568, 2432, 2, 525568, 2432, 2, 525568, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 2176, 524285, 4289724416, 3328, 85, 0, 3328, 85, 0, 3328, 85, 0, 3328, 85, 0, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4368, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 4384, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5392, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 5408, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 16704, 33554432, 131072, 16704, 33554432, 131072, 16720, 33554432, 131072, 16720, 33554432, 131072, 16736, 33554432, 131072, 16736, 33554432, 131072, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513, 22464, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756430008498482418_76_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756430008498482418_76_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..26a1a690 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756430008498482418_76_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,290 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 45)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((161 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 44))) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 876 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7760, 131072, 0, 7776, 131072, 0, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8848, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8852, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8864, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 8868, 2863311530, 2863303338, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10896, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10900, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10912, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 10916, 1431655765, 1431655765, 12176, 4369, 0, 12176, 4369, 0, 12176, 4369, 0, 12176, 4369, 0, 12192, 4369, 0, 12192, 4369, 0, 12192, 4369, 0, 12192, 4369, 0, 12208, 4369, 0, 12208, 4369, 0, 12208, 4369, 0, 12208, 4369, 0, 16128, 537002016, 2097664, 16128, 537002016, 2097664, 16128, 537002016, 2097664, 16128, 537002016, 2097664, 16128, 537002016, 2097664, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16448, 1145324612, 1145324612, 16896, 559240, 0, 16896, 559240, 0, 16896, 559240, 0, 16896, 559240, 0, 16896, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756430665147244116_78_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756430665147244116_78_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca2a9a22 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756430665147244116_78_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,295 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((27 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((201 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 480 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1748, 4369, 286326784, 1748, 4369, 286326784, 1748, 4369, 286326784, 1748, 4369, 286326784, 1748, 4369, 286326784, 1748, 4369, 286326784, 1748, 4369, 286326784, 1748, 4369, 286326784, 1752, 4369, 286326784, 1752, 4369, 286326784, 1752, 4369, 286326784, 1752, 4369, 286326784, 1752, 4369, 286326784, 1752, 4369, 286326784, 1752, 4369, 286326784, 1752, 4369, 286326784, 1756, 4369, 286326784, 1756, 4369, 286326784, 1756, 4369, 286326784, 1756, 4369, 286326784, 1756, 4369, 286326784, 1756, 4369, 286326784, 1756, 4369, 286326784, 1756, 4369, 286326784, 1764, 4369, 286326784, 1764, 4369, 286326784, 1764, 4369, 286326784, 1764, 4369, 286326784, 1764, 4369, 286326784, 1764, 4369, 286326784, 1764, 4369, 286326784, 1764, 4369, 286326784, 1768, 4369, 286326784, 1768, 4369, 286326784, 1768, 4369, 286326784, 1768, 4369, 286326784, 1768, 4369, 286326784, 1768, 4369, 286326784, 1768, 4369, 286326784, 1768, 4369, 286326784, 1772, 4369, 286326784, 1772, 4369, 286326784, 1772, 4369, 286326784, 1772, 4369, 286326784, 1772, 4369, 286326784, 1772, 4369, 286326784, 1772, 4369, 286326784, 1772, 4369, 286326784, 2452, 4369, 286326784, 2452, 4369, 286326784, 2452, 4369, 286326784, 2452, 4369, 286326784, 2452, 4369, 286326784, 2452, 4369, 286326784, 2452, 4369, 286326784, 2452, 4369, 286326784, 2456, 4369, 286326784, 2456, 4369, 286326784, 2456, 4369, 286326784, 2456, 4369, 286326784, 2456, 4369, 286326784, 2456, 4369, 286326784, 2456, 4369, 286326784, 2456, 4369, 286326784, 2460, 4369, 286326784, 2460, 4369, 286326784, 2460, 4369, 286326784, 2460, 4369, 286326784, 2460, 4369, 286326784, 2460, 4369, 286326784, 2460, 4369, 286326784, 2460, 4369, 286326784, 2468, 4369, 286326784, 2468, 4369, 286326784, 2468, 4369, 286326784, 2468, 4369, 286326784, 2468, 4369, 286326784, 2468, 4369, 286326784, 2468, 4369, 286326784, 2468, 4369, 286326784, 2472, 4369, 286326784, 2472, 4369, 286326784, 2472, 4369, 286326784, 2472, 4369, 286326784, 2472, 4369, 286326784, 2472, 4369, 286326784, 2472, 4369, 286326784, 2472, 4369, 286326784, 2476, 4369, 286326784, 2476, 4369, 286326784, 2476, 4369, 286326784, 2476, 4369, 286326784, 2476, 4369, 286326784, 2476, 4369, 286326784, 2476, 4369, 286326784, 2476, 4369, 286326784, 5264, 131104, 2097152, 5264, 131104, 2097152, 5264, 131104, 2097152, 7424, 4194304, 1073741824, 7424, 4194304, 1073741824, 7440, 4194304, 1073741824, 7440, 4194304, 1073741824, 7456, 4194304, 1073741824, 7456, 4194304, 1073741824, 9348, 1073758208, 16384, 9348, 1073758208, 16384, 9348, 1073758208, 16384, 9352, 1073758208, 16384, 9352, 1073758208, 16384, 9352, 1073758208, 16384, 9364, 1073758208, 16384, 9364, 1073758208, 16384, 9364, 1073758208, 16384, 9368, 1073758208, 16384, 9368, 1073758208, 16384, 9368, 1073758208, 16384, 9380, 1073758208, 16384, 9380, 1073758208, 16384, 9380, 1073758208, 16384, 9384, 1073758208, 16384, 9384, 1073758208, 16384, 9384, 1073758208, 16384, 10560, 16384, 0, 10576, 16384, 0, 10592, 16384, 0, 11520, 0, 2290614272, 11520, 0, 2290614272, 11520, 0, 2290614272, 11520, 0, 2290614272, 11536, 0, 2290614272, 11536, 0, 2290614272, 11536, 0, 2290614272, 11536, 0, 2290614272, 13952, 85, 0, 13952, 85, 0, 13952, 85, 0, 13952, 85, 0, 16768, 1024, 16781312, 16768, 1024, 16781312, 16768, 1024, 16781312, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16384, 0, 4278181888, 16128, 4096, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756430829166172300_79_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756430829166172300_79_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ddc89b29 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756430829166172300_79_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,245 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 47))) { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((154 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((173 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((185 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((189 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((204 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((counter2 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 105 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 273, 268435456, 1280, 273, 268435456, 1280, 273, 268435456, 1280, 273, 268435456, 5908, 69632, 17891328, 5908, 69632, 17891328, 5908, 69632, 17891328, 5908, 69632, 17891328, 5908, 69632, 17891328, 5912, 69632, 17891328, 5912, 69632, 17891328, 5912, 69632, 17891328, 5912, 69632, 17891328, 5912, 69632, 17891328, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 7872, 1145324612, 1145324612, 11860, 559240, 0, 11860, 559240, 0, 11860, 559240, 0, 11860, 559240, 0, 11860, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756430837025984724_80_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756430837025984724_80_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e0acc24e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756430837025984724_80_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,72 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1552, 1, 3221225472, 1552, 1, 3221225472, 1552, 1, 3221225472, 1568, 1, 3221225472, 1568, 1, 3221225472, 1568, 1, 3221225472, 1584, 1, 3221225472, 1584, 1, 3221225472, 1584, 1, 3221225472] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756430922534640566_82_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756430922534640566_82_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..49578a41 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756430922534640566_82_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,79 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 33554944, 524288, 1360, 33554944, 524288, 1360, 33554944, 524288, 1376, 33554944, 524288, 1376, 33554944, 524288, 1376, 33554944, 524288, 2448, 16777218, 16384, 2448, 16777218, 16384, 2448, 16777218, 16384, 2464, 16777218, 16384, 2464, 16777218, 16384, 2464, 16777218, 16384] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756430923621105464_83_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756430923621105464_83_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7fd4cae6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756430923621105464_83_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,236 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 58))) { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 6))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 0, 1048576, 1488, 0, 1048576, 1504, 0, 1048576, 2112, 273, 0, 2112, 273, 0, 2112, 273, 0, 2128, 273, 0, 2128, 273, 0, 2128, 273, 0, 2144, 273, 0, 2144, 273, 0, 2144, 273, 0, 3520, 0, 1048576, 3536, 0, 1048576, 3552, 0, 1048576] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756430929224422402_84_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756430929224422402_84_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..99684e80 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756430929224422402_84_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,164 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 52))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4544, 32768, 0, 4560, 32768, 0, 4576, 32768, 0, 5568, 666282, 2147483648, 5568, 666282, 2147483648, 5568, 666282, 2147483648, 5568, 666282, 2147483648, 5568, 666282, 2147483648, 5568, 666282, 2147483648, 5568, 666282, 2147483648, 5568, 666282, 2147483648, 5568, 666282, 2147483648, 5568, 666282, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756430929926120644_85_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756430929926120644_85_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3067aff5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756430929926120644_85_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 45)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 273, 0, 1280, 273, 0, 1280, 273, 0, 2368, 273, 0, 2368, 273, 0, 2368, 273, 0, 2384, 273, 0, 2384, 273, 0, 2384, 273, 0, 2400, 273, 0, 2400, 273, 0, 2400, 273, 0, 3264, 17, 0, 3264, 17, 0, 4992, 0, 572653568, 4992, 0, 572653568, 4992, 0, 572653568, 4992, 0, 572653568, 5696, 546, 0, 5696, 546, 0, 5696, 546, 0, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 8064, 559240, 0, 8064, 559240, 0, 8064, 559240, 0, 8064, 559240, 0, 8064, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756430932040722888_86_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756430932040722888_86_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2f94178b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756430932040722888_86_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 22)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 4224, 272696336, 68174084, 4224, 272696336, 68174084, 4224, 272696336, 68174084, 4224, 272696336, 68174084, 4224, 272696336, 68174084, 4224, 272696336, 68174084, 4224, 272696336, 68174084, 4224, 272696336, 68174084, 4224, 272696336, 68174084, 4224, 272696336, 68174084, 7232, 2340, 0, 7232, 2340, 0, 7232, 2340, 0, 7232, 2340, 0, 8384, 36, 0, 8384, 36, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756431042304582123_88_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756431042304582123_88_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..34cb375b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756431042304582123_88_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,293 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } else { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1856, 272696336, 68174084, 1856, 272696336, 68174084, 1856, 272696336, 68174084, 1856, 272696336, 68174084, 1856, 272696336, 68174084, 1856, 272696336, 68174084, 1856, 272696336, 68174084, 1856, 272696336, 68174084, 1856, 272696336, 68174084, 1856, 272696336, 68174084, 1872, 272696336, 68174084, 1872, 272696336, 68174084, 1872, 272696336, 68174084, 1872, 272696336, 68174084, 1872, 272696336, 68174084, 1872, 272696336, 68174084, 1872, 272696336, 68174084, 1872, 272696336, 68174084, 1872, 272696336, 68174084, 1872, 272696336, 68174084, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 2176, 68174084, 1090785345, 3856, 545259520, 0, 3856, 545259520, 0, 7504, 2, 2, 7504, 2, 2, 8896, 17, 0, 8896, 17, 0, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 9792, 1145324612, 1145324612, 12480, 32768, 8912896, 12480, 32768, 8912896, 12480, 32768, 8912896, 13120, 8, 0, 14272, 0, 536870912, 14592, 545392672, 136348168, 14592, 545392672, 136348168, 14592, 545392672, 136348168, 14592, 545392672, 136348168, 14592, 545392672, 136348168, 14592, 545392672, 136348168, 14592, 545392672, 136348168, 14592, 545392672, 136348168, 14592, 545392672, 136348168, 14592, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756431851452013085_90_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756431851452013085_90_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3fcea24b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756431851452013085_90_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,131 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((23 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 59))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1492, 85, 0, 1492, 85, 0, 1492, 85, 0, 1492, 85, 0, 1496, 85, 0, 1496, 85, 0, 1496, 85, 0, 1496, 85, 0, 1508, 85, 0, 1508, 85, 0, 1508, 85, 0, 1508, 85, 0, 1512, 85, 0, 1512, 85, 0, 1512, 85, 0, 1512, 85, 0, 4948, 8192, 0, 4952, 8192, 0, 4964, 8192, 0, 4968, 8192, 0, 6164, 134218240, 2048, 6164, 134218240, 2048, 6164, 134218240, 2048, 6168, 134218240, 2048, 6168, 134218240, 2048, 6168, 134218240, 2048, 6180, 134218240, 2048, 6180, 134218240, 2048, 6180, 134218240, 2048, 6184, 134218240, 2048, 6184, 134218240, 2048, 6184, 134218240, 2048] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756431875686141689_91_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756431875686141689_91_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..801b1141 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756431875686141689_91_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,246 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 56))) { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 32))) { + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 49))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 9362, 613416960, 1792, 9362, 613416960, 1792, 9362, 613416960, 1792, 9362, 613416960, 1792, 9362, 613416960, 1792, 9362, 613416960, 1792, 9362, 613416960, 1792, 9362, 613416960, 1792, 9362, 613416960, 1808, 9362, 613416960, 1808, 9362, 613416960, 1808, 9362, 613416960, 1808, 9362, 613416960, 1808, 9362, 613416960, 1808, 9362, 613416960, 1808, 9362, 613416960, 1808, 9362, 613416960, 1808, 9362, 613416960, 1824, 9362, 613416960, 1824, 9362, 613416960, 1824, 9362, 613416960, 1824, 9362, 613416960, 1824, 9362, 613416960, 1824, 9362, 613416960, 1824, 9362, 613416960, 1824, 9362, 613416960, 1824, 9362, 613416960, 4032, 2, 0, 4036, 2, 0, 4048, 2, 0, 4052, 2, 0, 4064, 2, 0, 4068, 2, 0, 5248, 65538, 16384, 5248, 65538, 16384, 5248, 65538, 16384, 5252, 65538, 16384, 5252, 65538, 16384, 5252, 65538, 16384, 5264, 65538, 16384, 5264, 65538, 16384, 5264, 65538, 16384, 5268, 65538, 16384, 5268, 65538, 16384, 5268, 65538, 16384, 5280, 65538, 16384, 5280, 65538, 16384, 5280, 65538, 16384, 5284, 65538, 16384, 5284, 65538, 16384, 5284, 65538, 16384, 12608, 0, 16777216, 12624, 0, 16777216, 12640, 0, 16777216, 13056, 0, 16777216, 13072, 0, 16777216, 13088, 0, 16777216, 14528, 0, 16777216, 15104, 545392672, 136348168, 15104, 545392672, 136348168, 15104, 545392672, 136348168, 15104, 545392672, 136348168, 15104, 545392672, 136348168, 15104, 545392672, 136348168, 15104, 545392672, 136348168, 15104, 545392672, 136348168, 15104, 545392672, 136348168, 15104, 545392672, 136348168, 18112, 1065220, 1074008129, 18112, 1065220, 1074008129, 18112, 1065220, 1074008129, 18112, 1065220, 1074008129, 18112, 1065220, 1074008129, 18112, 1065220, 1074008129, 18112, 1065220, 1074008129, 18112, 1065220, 1074008129, 18112, 1065220, 1074008129] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756431916255203515_92_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756431916255203515_92_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..40ae998b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756431916255203515_92_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 46))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 43))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4944, 0, 16, 4960, 0, 16, 7312, 268451840, 256, 7312, 268451840, 256, 7312, 268451840, 256, 7328, 268451840, 256, 7328, 268451840, 256, 7328, 268451840, 256] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756431917394206825_93_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756431917394206825_93_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..73b80d7d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756431917394206825_93_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 59))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 1024, 524320, 1488, 1024, 524320, 1488, 1024, 524320, 1504, 1024, 524320, 1504, 1024, 524320, 1504, 1024, 524320] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756431920358130450_95_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756431920358130450_95_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53c93eda --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756431920358130450_95_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756431920674831321_96_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756431920674831321_96_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4f5cc53b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756431920674831321_96_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 45))) { + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 3: { + if ((WaveGetLaneIndex() >= 61)) { + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 465 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 4288, 286331153, 286331153, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5456, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5472, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 5488, 1, 2004316160, 6608, 7, 1946157056, 6608, 7, 1946157056, 6608, 7, 1946157056, 6608, 7, 1946157056, 6608, 7, 1946157056, 6608, 7, 1946157056, 6608, 7, 1946157056, 6624, 7, 1946157056, 6624, 7, 1946157056, 6624, 7, 1946157056, 6624, 7, 1946157056, 6624, 7, 1946157056, 6624, 7, 1946157056, 6624, 7, 1946157056, 6640, 7, 1946157056, 6640, 7, 1946157056, 6640, 7, 1946157056, 6640, 7, 1946157056, 6640, 7, 1946157056, 6640, 7, 1946157056, 6640, 7, 1946157056, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7184, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7200, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7216, 572662272, 35791394, 7808, 0, 3758096384, 7808, 0, 3758096384, 7808, 0, 3758096384, 8848, 0, 1073741824, 8864, 0, 1073741824, 8880, 0, 1073741824, 9728, 524288, 0, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 10896, 8191, 532676608, 11712, 85, 0, 11712, 85, 0, 11712, 85, 0, 11712, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432194172636004_98_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432194172636004_98_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5bf7313f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432194172636004_98_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 40)) { + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 41)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2512, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2528, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 2544, 2290649224, 2290649224, 3152, 0, 2290649088, 3152, 0, 2290649088, 3152, 0, 2290649088, 3152, 0, 2290649088, 3152, 0, 2290649088, 3152, 0, 2290649088, 3168, 0, 2290649088, 3168, 0, 2290649088, 3168, 0, 2290649088, 3168, 0, 2290649088, 3168, 0, 2290649088, 3168, 0, 2290649088, 3184, 0, 2290649088, 3184, 0, 2290649088, 3184, 0, 2290649088, 3184, 0, 2290649088, 3184, 0, 2290649088, 3184, 0, 2290649088, 3728, 0, 2290649088, 3728, 0, 2290649088, 3728, 0, 2290649088, 3728, 0, 2290649088, 3728, 0, 2290649088, 3728, 0, 2290649088, 3744, 0, 2290649088, 3744, 0, 2290649088, 3744, 0, 2290649088, 3744, 0, 2290649088, 3744, 0, 2290649088, 3744, 0, 2290649088, 3760, 0, 2290649088, 3760, 0, 2290649088, 3760, 0, 2290649088, 3760, 0, 2290649088, 3760, 0, 2290649088, 3760, 0, 2290649088] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432195831527026_99_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432195831527026_99_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..409cc213 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432195831527026_99_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,79 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 576 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1104, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1120, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1136, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765, 1712, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432198764075143_100_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432198764075143_100_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2dfe9759 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432198764075143_100_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,548 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 61)) { + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 43))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 35))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((344 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (353 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (364 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 47))) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (392 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 32))) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (418 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (435 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (446 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (467 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (478 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((504 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((515 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((526 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((533 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (543 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (552 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((579 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((589 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((598 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((609 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((620 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (624 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 768 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 1, 0, 4096, 33562626, 537002016, 4096, 33562626, 537002016, 4096, 33562626, 537002016, 4096, 33562626, 537002016, 4096, 33562626, 537002016, 4096, 33562626, 537002016, 4800, 33562626, 537002016, 4800, 33562626, 537002016, 4800, 33562626, 537002016, 4800, 33562626, 537002016, 4800, 33562626, 537002016, 4800, 33562626, 537002016, 6656, 0, 18874368, 6656, 0, 18874368, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 7936, 1145324612, 1145324612, 8384, 559240, 0, 8384, 559240, 0, 8384, 559240, 0, 8384, 559240, 0, 8384, 559240, 0, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16960, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 16976, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17664, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 17680, 572662306, 572662306, 23296, 4194304, 262144, 23296, 4194304, 262144, 28544, 0, 32768, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 29888, 136348168, 2181570690, 30592, 65, 272695296, 30592, 65, 272695296, 30592, 65, 272695296, 30592, 65, 272695296, 30592, 65, 272695296, 32256, 65, 272629760, 32256, 65, 272629760, 32256, 65, 272629760, 32256, 65, 272629760, 32272, 65, 272629760, 32272, 65, 272629760, 32272, 65, 272629760, 32272, 65, 272629760, 32960, 65, 268435456, 32960, 65, 268435456, 32960, 65, 268435456, 32976, 65, 268435456, 32976, 65, 268435456, 32976, 65, 268435456, 33664, 0, 65536, 33680, 0, 65536, 34112, 262144, 0, 34128, 262144, 0, 34752, 81, 0, 34752, 81, 0, 34752, 81, 0, 37056, 0, 1227128832, 37056, 0, 1227128832, 37056, 0, 1227128832, 37056, 0, 1227128832, 37056, 0, 1227128832, 37056, 0, 1227128832, 37060, 0, 1227128832, 37060, 0, 1227128832, 37060, 0, 1227128832, 37060, 0, 1227128832, 37060, 0, 1227128832, 37060, 0, 1227128832, 37072, 0, 1227128832, 37072, 0, 1227128832, 37072, 0, 1227128832, 37072, 0, 1227128832, 37072, 0, 1227128832, 37072, 0, 1227128832, 37076, 0, 1227128832, 37076, 0, 1227128832, 37076, 0, 1227128832, 37076, 0, 1227128832, 37076, 0, 1227128832, 37076, 0, 1227128832, 37696, 4, 0, 37700, 4, 0, 37712, 4, 0, 37716, 4, 0, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38272, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38276, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38288, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38292, 68174084, 1090785345, 38976, 292, 1227128832, 38976, 292, 1227128832, 38976, 292, 1227128832, 38976, 292, 1227128832, 38976, 292, 1227128832, 38976, 292, 1227128832, 38976, 292, 1227128832, 38976, 292, 1227128832, 38976, 292, 1227128832, 38980, 292, 1227128832, 38980, 292, 1227128832, 38980, 292, 1227128832, 38980, 292, 1227128832, 38980, 292, 1227128832, 38980, 292, 1227128832, 38980, 292, 1227128832, 38980, 292, 1227128832, 38980, 292, 1227128832, 38992, 292, 1227128832, 38992, 292, 1227128832, 38992, 292, 1227128832, 38992, 292, 1227128832, 38992, 292, 1227128832, 38992, 292, 1227128832, 38992, 292, 1227128832, 38992, 292, 1227128832, 38992, 292, 1227128832, 38996, 292, 1227128832, 38996, 292, 1227128832, 38996, 292, 1227128832, 38996, 292, 1227128832, 38996, 292, 1227128832, 38996, 292, 1227128832, 38996, 292, 1227128832, 38996, 292, 1227128832, 38996, 292, 1227128832, 39680, 0, 1226833920, 39680, 0, 1226833920, 39680, 0, 1226833920, 39680, 0, 1226833920, 39696, 0, 1226833920, 39696, 0, 1226833920, 39696, 0, 1226833920, 39696, 0, 1226833920] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432398356020316_103_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432398356020316_103_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..beb5ff0e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432398356020316_103_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2884, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2888, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2900, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 2904, 1090785345, 272696336, 3648, 272696336, 68174084, 3648, 272696336, 68174084, 3648, 272696336, 68174084, 3648, 272696336, 68174084, 3648, 272696336, 68174084, 3648, 272696336, 68174084, 3648, 272696336, 68174084, 3648, 272696336, 68174084, 3648, 272696336, 68174084, 3648, 272696336, 68174084, 4800, 36, 1226833920, 4800, 36, 1226833920, 4800, 36, 1226833920, 4800, 36, 1226833920, 4800, 36, 1226833920, 4800, 36, 1226833920, 6784, 0, 2097152, 6800, 0, 2097152, 6816, 0, 2097152, 10240, 8388608, 8, 10240, 8388608, 8, 11136, 85, 0, 11136, 85, 0, 11136, 85, 0, 11136, 85, 0, 12496, 134217728, 8192, 12496, 134217728, 8192, 12512, 134217728, 8192, 12512, 134217728, 8192, 13632, 2, 545259520, 13632, 2, 545259520, 13632, 2, 545259520, 14336, 2, 545390592, 14336, 2, 545390592, 14336, 2, 545390592, 14336, 2, 545390592, 15424, 545392672, 136348168, 15424, 545392672, 136348168, 15424, 545392672, 136348168, 15424, 545392672, 136348168, 15424, 545392672, 136348168, 15424, 545392672, 136348168, 15424, 545392672, 136348168, 15424, 545392672, 136348168, 15424, 545392672, 136348168, 15424, 545392672, 136348168, 15440, 545392672, 136348168, 15440, 545392672, 136348168, 15440, 545392672, 136348168, 15440, 545392672, 136348168, 15440, 545392672, 136348168, 15440, 545392672, 136348168, 15440, 545392672, 136348168, 15440, 545392672, 136348168, 15440, 545392672, 136348168, 15440, 545392672, 136348168, 15456, 545392672, 136348168, 15456, 545392672, 136348168, 15456, 545392672, 136348168, 15456, 545392672, 136348168, 15456, 545392672, 136348168, 15456, 545392672, 136348168, 15456, 545392672, 136348168, 15456, 545392672, 136348168, 15456, 545392672, 136348168, 15456, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432430360347142_104_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432430360347142_104_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d548ef0d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432430360347142_104_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,208 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 35))) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 50))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 46))) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 1048576, 4097, 1856, 1048576, 4097, 1856, 1048576, 4097, 1872, 1048576, 4097, 1872, 1048576, 4097, 1872, 1048576, 4097, 1888, 1048576, 4097, 1888, 1048576, 4097, 1888, 1048576, 4097, 3968, 17, 0, 3968, 17, 0, 3984, 17, 0, 3984, 17, 0, 4000, 17, 0, 4000, 17, 0, 4416, 17895697, 0, 4416, 17895697, 0, 4416, 17895697, 0, 4416, 17895697, 0, 4416, 17895697, 0, 4416, 17895697, 0, 4416, 17895697, 0, 4432, 17895697, 0, 4432, 17895697, 0, 4432, 17895697, 0, 4432, 17895697, 0, 4432, 17895697, 0, 4432, 17895697, 0, 4432, 17895697, 0, 4448, 17895697, 0, 4448, 17895697, 0, 4448, 17895697, 0, 4448, 17895697, 0, 4448, 17895697, 0, 4448, 17895697, 0, 4448, 17895697, 0, 13568, 1073758208, 0, 13568, 1073758208, 0, 13584, 1073758208, 0, 13584, 1073758208, 0, 14400, 559240, 0, 14400, 559240, 0, 14400, 559240, 0, 14400, 559240, 0, 14400, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432446081647627_106_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432446081647627_106_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1fb4308c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432446081647627_106_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,359 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 5))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 28)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (321 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 465 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1920, 8, 2181038080, 1920, 8, 2181038080, 1920, 8, 2181038080, 3776, 8, 2181038080, 3776, 8, 2181038080, 3776, 8, 2181038080, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10832, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10848, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 10864, 1431655765, 1431655765, 12608, 32768, 0, 12624, 32768, 0, 16128, 8192, 0, 16144, 8192, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16576, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 16592, 178948746, 0, 17024, 0, 715816960, 17024, 0, 715816960, 17024, 0, 715816960, 17024, 0, 715816960, 17024, 0, 715816960, 17024, 0, 715816960, 17024, 0, 715816960, 17024, 0, 715816960, 17040, 0, 715816960, 17040, 0, 715816960, 17040, 0, 715816960, 17040, 0, 715816960, 17040, 0, 715816960, 17040, 0, 715816960, 17040, 0, 715816960, 17040, 0, 715816960, 17664, 85, 0, 17664, 85, 0, 17664, 85, 0, 17664, 85, 0, 18304, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432488041482361_107_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432488041482361_107_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..95f85154 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432488041482361_107_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 54))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 43))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 195 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 10384, 33554432, 2048, 10384, 33554432, 2048, 10388, 33554432, 2048, 10388, 33554432, 2048, 10392, 33554432, 2048, 10392, 33554432, 2048, 10400, 33554432, 2048, 10400, 33554432, 2048, 10404, 33554432, 2048, 10404, 33554432, 2048, 10408, 33554432, 2048, 10408, 33554432, 2048, 12288, 1073741952, 1048576, 12288, 1073741952, 1048576, 12288, 1073741952, 1048576, 14224, 2048, 147460, 14224, 2048, 147460, 14224, 2048, 147460, 14224, 2048, 147460, 14240, 2048, 147460, 14240, 2048, 147460, 14240, 2048, 147460, 14240, 2048, 147460, 15440, 134742144, 16384, 15440, 134742144, 16384, 15440, 134742144, 16384, 15440, 134742144, 16384, 15456, 134742144, 16384, 15456, 134742144, 16384, 15456, 134742144, 16384, 15456, 134742144, 16384, 16576, 268443648, 67108864, 16576, 268443648, 67108864, 16576, 268443648, 67108864] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432551642003810_109_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432551642003810_109_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf94b0e6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432551642003810_109_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,276 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 20)) { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 6))) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 435 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2256, 2097152, 66048, 2256, 2097152, 66048, 2256, 2097152, 66048, 2272, 2097152, 66048, 2272, 2097152, 66048, 2272, 2097152, 66048, 3856, 13107, 0, 3856, 13107, 0, 3856, 13107, 0, 3856, 13107, 0, 3856, 13107, 0, 3856, 13107, 0, 3856, 13107, 0, 3856, 13107, 0, 3860, 13107, 0, 3860, 13107, 0, 3860, 13107, 0, 3860, 13107, 0, 3860, 13107, 0, 3860, 13107, 0, 3860, 13107, 0, 3860, 13107, 0, 3864, 13107, 0, 3864, 13107, 0, 3864, 13107, 0, 3864, 13107, 0, 3864, 13107, 0, 3864, 13107, 0, 3864, 13107, 0, 3864, 13107, 0, 3872, 13107, 0, 3872, 13107, 0, 3872, 13107, 0, 3872, 13107, 0, 3872, 13107, 0, 3872, 13107, 0, 3872, 13107, 0, 3872, 13107, 0, 3876, 13107, 0, 3876, 13107, 0, 3876, 13107, 0, 3876, 13107, 0, 3876, 13107, 0, 3876, 13107, 0, 3876, 13107, 0, 3876, 13107, 0, 3880, 13107, 0, 3880, 13107, 0, 3880, 13107, 0, 3880, 13107, 0, 3880, 13107, 0, 3880, 13107, 0, 3880, 13107, 0, 3880, 13107, 0, 4304, 3, 0, 4304, 3, 0, 4308, 3, 0, 4308, 3, 0, 4312, 3, 0, 4312, 3, 0, 4320, 3, 0, 4320, 3, 0, 4324, 3, 0, 4324, 3, 0, 4328, 3, 0, 4328, 3, 0, 5264, 16777216, 0, 5268, 16777216, 0, 5272, 16777216, 0, 5280, 16777216, 0, 5284, 16777216, 0, 5288, 16777216, 0, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 7104, 838860, 0, 7104, 838860, 0, 7104, 838860, 0, 7104, 838860, 0, 7104, 838860, 0, 7104, 838860, 0, 7104, 838860, 0, 7104, 838860, 0, 7104, 838860, 0, 7104, 838860, 0, 9024, 0, 16, 9040, 0, 16, 9056, 0, 16, 9924, 268435456, 0, 9928, 268435456, 0, 9932, 268435456, 0, 9940, 268435456, 0, 9944, 268435456, 0, 9948, 268435456, 0, 9956, 268435456, 0, 9960, 268435456, 0, 9964, 268435456, 0, 11328, 0, 16777216, 11344, 0, 16777216, 11360, 0, 16777216, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 16704, 1145324612, 1145324612, 18448, 2184, 2290089984, 18448, 2184, 2290089984, 18448, 2184, 2290089984, 18448, 2184, 2290089984, 18448, 2184, 2290089984, 18448, 2184, 2290089984, 18464, 2184, 2290089984, 18464, 2184, 2290089984, 18464, 2184, 2290089984, 18464, 2184, 2290089984, 18464, 2184, 2290089984, 18464, 2184, 2290089984, 19152, 2048, 0, 19168, 2048, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432748026438049_110_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432748026438049_110_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a23a254c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432748026438049_110_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 60))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2112, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2128, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 2144, 136348168, 2181570690, 3268, 8, 2181562368, 3268, 8, 2181562368, 3268, 8, 2181562368, 3268, 8, 2181562368, 3272, 8, 2181562368, 3272, 8, 2181562368, 3272, 8, 2181562368, 3272, 8, 2181562368, 3284, 8, 2181562368, 3284, 8, 2181562368, 3284, 8, 2181562368, 3284, 8, 2181562368, 3288, 8, 2181562368, 3288, 8, 2181562368, 3288, 8, 2181562368, 3288, 8, 2181562368, 3300, 8, 2181562368, 3300, 8, 2181562368, 3300, 8, 2181562368, 3300, 8, 2181562368, 3304, 8, 2181562368, 3304, 8, 2181562368, 3304, 8, 2181562368, 3304, 8, 2181562368, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7120, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7136, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 7152, 2181570690, 545392672, 9088, 545392672, 136348168, 9088, 545392672, 136348168, 9088, 545392672, 136348168, 9088, 545392672, 136348168, 9088, 545392672, 136348168, 9088, 545392672, 136348168, 9088, 545392672, 136348168, 9088, 545392672, 136348168, 9088, 545392672, 136348168, 9088, 545392672, 136348168, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530, 9664, 2863311530, 2863311530] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432760182310359_112_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432760182310359_112_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..15263193 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432760182310359_112_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,81 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 213 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 576, 32767, 0, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1872, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144, 1888, 131071, 4292870144] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432761070381474_113_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432761070381474_113_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..72e6b429 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432761070381474_113_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,248 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 48))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 50))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 60))) { + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 47)) { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7056, 0, 65536, 7072, 0, 65536, 12672, 128, 0, 12688, 128, 0, 12704, 128, 0, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513, 14400, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432764740920534_114_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432764740920534_114_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e555e06f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432764740920534_114_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,411 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 45))) { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 17)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 55))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((264 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 55))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 31)) { + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (402 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 450 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 9216, 87381, 0, 9216, 87381, 0, 9216, 87381, 0, 9216, 87381, 0, 9216, 87381, 0, 9216, 87381, 0, 9216, 87381, 0, 9216, 87381, 0, 9216, 87381, 0, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 10240, 1163220309, 1431655765, 16912, 69905, 286261248, 16912, 69905, 286261248, 16912, 69905, 286261248, 16912, 69905, 286261248, 16912, 69905, 286261248, 16912, 69905, 286261248, 16912, 69905, 286261248, 16912, 69905, 286261248, 16916, 69905, 286261248, 16916, 69905, 286261248, 16916, 69905, 286261248, 16916, 69905, 286261248, 16916, 69905, 286261248, 16916, 69905, 286261248, 16916, 69905, 286261248, 16916, 69905, 286261248, 16920, 69905, 286261248, 16920, 69905, 286261248, 16920, 69905, 286261248, 16920, 69905, 286261248, 16920, 69905, 286261248, 16920, 69905, 286261248, 16920, 69905, 286261248, 16920, 69905, 286261248, 16928, 69905, 286261248, 16928, 69905, 286261248, 16928, 69905, 286261248, 16928, 69905, 286261248, 16928, 69905, 286261248, 16928, 69905, 286261248, 16928, 69905, 286261248, 16928, 69905, 286261248, 16932, 69905, 286261248, 16932, 69905, 286261248, 16932, 69905, 286261248, 16932, 69905, 286261248, 16932, 69905, 286261248, 16932, 69905, 286261248, 16932, 69905, 286261248, 16932, 69905, 286261248, 16936, 69905, 286261248, 16936, 69905, 286261248, 16936, 69905, 286261248, 16936, 69905, 286261248, 16936, 69905, 286261248, 16936, 69905, 286261248, 16936, 69905, 286261248, 16936, 69905, 286261248, 16944, 69905, 286261248, 16944, 69905, 286261248, 16944, 69905, 286261248, 16944, 69905, 286261248, 16944, 69905, 286261248, 16944, 69905, 286261248, 16944, 69905, 286261248, 16944, 69905, 286261248, 16948, 69905, 286261248, 16948, 69905, 286261248, 16948, 69905, 286261248, 16948, 69905, 286261248, 16948, 69905, 286261248, 16948, 69905, 286261248, 16948, 69905, 286261248, 16948, 69905, 286261248, 16952, 69905, 286261248, 16952, 69905, 286261248, 16952, 69905, 286261248, 16952, 69905, 286261248, 16952, 69905, 286261248, 16952, 69905, 286261248, 16952, 69905, 286261248, 16952, 69905, 286261248, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 22016, 1145324612, 1145324612, 24640, 0, 2290089984, 24640, 0, 2290089984, 24640, 0, 2290089984, 25728, 0, 2290649088, 25728, 0, 2290649088, 25728, 0, 2290649088, 25728, 0, 2290649088, 25728, 0, 2290649088, 25728, 0, 2290649088] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432800952120743_115_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432800952120743_115_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..286ae4d4 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432800952120743_115_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,327 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 46)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 52)) { + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((290 << 6) | (counter6 << 4)) | (i7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 900 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1472, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1488, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 1504, 37449, 2454265856, 4416, 585, 2454192128, 4416, 585, 2454192128, 4416, 585, 2454192128, 4416, 585, 2454192128, 4416, 585, 2454192128, 4416, 585, 2454192128, 4416, 585, 2454192128, 4416, 585, 2454192128, 4416, 585, 2454192128, 4432, 585, 2454192128, 4432, 585, 2454192128, 4432, 585, 2454192128, 4432, 585, 2454192128, 4432, 585, 2454192128, 4432, 585, 2454192128, 4432, 585, 2454192128, 4432, 585, 2454192128, 4432, 585, 2454192128, 4448, 585, 2454192128, 4448, 585, 2454192128, 4448, 585, 2454192128, 4448, 585, 2454192128, 4448, 585, 2454192128, 4448, 585, 2454192128, 4448, 585, 2454192128, 4448, 585, 2454192128, 4448, 585, 2454192128, 5956, 272696336, 68174084, 5956, 272696336, 68174084, 5956, 272696336, 68174084, 5956, 272696336, 68174084, 5956, 272696336, 68174084, 5956, 272696336, 68174084, 5956, 272696336, 68174084, 5956, 272696336, 68174084, 5956, 272696336, 68174084, 5956, 272696336, 68174084, 5960, 272696336, 68174084, 5960, 272696336, 68174084, 5960, 272696336, 68174084, 5960, 272696336, 68174084, 5960, 272696336, 68174084, 5960, 272696336, 68174084, 5960, 272696336, 68174084, 5960, 272696336, 68174084, 5960, 272696336, 68174084, 5960, 272696336, 68174084, 5964, 272696336, 68174084, 5964, 272696336, 68174084, 5964, 272696336, 68174084, 5964, 272696336, 68174084, 5964, 272696336, 68174084, 5964, 272696336, 68174084, 5964, 272696336, 68174084, 5964, 272696336, 68174084, 5964, 272696336, 68174084, 5964, 272696336, 68174084, 5972, 272696336, 68174084, 5972, 272696336, 68174084, 5972, 272696336, 68174084, 5972, 272696336, 68174084, 5972, 272696336, 68174084, 5972, 272696336, 68174084, 5972, 272696336, 68174084, 5972, 272696336, 68174084, 5972, 272696336, 68174084, 5972, 272696336, 68174084, 5976, 272696336, 68174084, 5976, 272696336, 68174084, 5976, 272696336, 68174084, 5976, 272696336, 68174084, 5976, 272696336, 68174084, 5976, 272696336, 68174084, 5976, 272696336, 68174084, 5976, 272696336, 68174084, 5976, 272696336, 68174084, 5976, 272696336, 68174084, 5980, 272696336, 68174084, 5980, 272696336, 68174084, 5980, 272696336, 68174084, 5980, 272696336, 68174084, 5980, 272696336, 68174084, 5980, 272696336, 68174084, 5980, 272696336, 68174084, 5980, 272696336, 68174084, 5980, 272696336, 68174084, 5980, 272696336, 68174084, 6596, 146, 0, 6596, 146, 0, 6596, 146, 0, 6600, 146, 0, 6600, 146, 0, 6600, 146, 0, 6604, 146, 0, 6604, 146, 0, 6604, 146, 0, 6612, 146, 0, 6612, 146, 0, 6612, 146, 0, 6616, 146, 0, 6616, 146, 0, 6616, 146, 0, 6620, 146, 0, 6620, 146, 0, 6620, 146, 0, 7812, 268435456, 32, 7812, 268435456, 32, 7816, 268435456, 32, 7816, 268435456, 32, 7820, 268435456, 32, 7820, 268435456, 32, 7828, 268435456, 32, 7828, 268435456, 32, 7832, 268435456, 32, 7832, 268435456, 32, 7836, 268435456, 32, 7836, 268435456, 32, 8516, 268435456, 8388608, 8516, 268435456, 8388608, 8520, 268435456, 8388608, 8520, 268435456, 8388608, 8524, 268435456, 8388608, 8524, 268435456, 8388608, 8532, 268435456, 8388608, 8532, 268435456, 8388608, 8536, 268435456, 8388608, 8536, 268435456, 8388608, 8540, 268435456, 8388608, 8540, 268435456, 8388608, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9092, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9096, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9100, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9108, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9112, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9116, 2181570690, 545392672, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 10432, 1, 0, 11008, 268501008, 1048832, 11008, 268501008, 1048832, 11008, 268501008, 1048832, 11008, 268501008, 1048832, 11008, 268501008, 1048832, 13440, 8738, 572522496, 13440, 8738, 572522496, 13440, 8738, 572522496, 13440, 8738, 572522496, 13440, 8738, 572522496, 13440, 8738, 572522496, 13440, 8738, 572522496, 14736, 2, 572522496, 14736, 2, 572522496, 14736, 2, 572522496, 14736, 2, 572522496, 14752, 2, 572522496, 14752, 2, 572522496, 14752, 2, 572522496, 14752, 2, 572522496, 14768, 2, 572522496, 14768, 2, 572522496, 14768, 2, 572522496, 14768, 2, 572522496, 18576, 136, 2147483648, 18576, 136, 2147483648, 18576, 136, 2147483648, 18580, 136, 2147483648, 18580, 136, 2147483648, 18580, 136, 2147483648, 18584, 136, 2147483648, 18584, 136, 2147483648, 18584, 136, 2147483648, 18592, 136, 2147483648, 18592, 136, 2147483648, 18592, 136, 2147483648, 18596, 136, 2147483648, 18596, 136, 2147483648, 18596, 136, 2147483648, 18600, 136, 2147483648, 18600, 136, 2147483648, 18600, 136, 2147483648, 19216, 0, 2147483648, 19232, 0, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432992065358738_117_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432992065358738_117_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7928669e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432992065358738_117_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,136 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 25)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 65, 0, 1792, 65, 0, 2368, 4260880, 0, 2368, 4260880, 0, 2368, 4260880, 0, 2368, 4260880, 0, 2688, 1065220, 0, 2688, 1065220, 0, 2688, 1065220, 0, 2688, 1065220, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756432992380264307_118_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756432992380264307_118_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..31b742f3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756432992380264307_118_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,468 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 42))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 51)) { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 61))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((220 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((229 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (290 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((347 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 59))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((429 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((440 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((451 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter6 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (463 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (468 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 207 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 1, 0, 1536, 268501008, 1048832, 1536, 268501008, 1048832, 1536, 268501008, 1048832, 1536, 268501008, 1048832, 1536, 268501008, 1048832, 3392, 256, 0, 3408, 256, 0, 4608, 1, 0, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 11968, 1145324612, 1145324612, 20032, 68, 0, 20032, 68, 0, 24640, 64, 0, 28880, 1, 2453667840, 28880, 1, 2453667840, 28880, 1, 2453667840, 28880, 1, 2453667840, 28880, 1, 2453667840, 28896, 1, 2453667840, 28896, 1, 2453667840, 28896, 1, 2453667840, 28896, 1, 2453667840, 28896, 1, 2453667840, 29632, 272696336, 68174084, 29632, 272696336, 68174084, 29632, 272696336, 68174084, 29632, 272696336, 68174084, 29632, 272696336, 68174084, 29632, 272696336, 68174084, 29632, 272696336, 68174084, 29632, 272696336, 68174084, 29632, 272696336, 68174084, 29632, 272696336, 68174084, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513, 29952, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756433024143945668_119_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756433024143945668_119_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1f72a82e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756433024143945668_119_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,253 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 58)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 51)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4672, 272696336, 68174084, 4672, 272696336, 68174084, 4672, 272696336, 68174084, 4672, 272696336, 68174084, 4672, 272696336, 68174084, 4672, 272696336, 68174084, 4672, 272696336, 68174084, 4672, 272696336, 68174084, 4672, 272696336, 68174084, 4672, 272696336, 68174084, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 6464, 68174084, 1090785345, 7760, 260, 1090785280, 7760, 260, 1090785280, 7760, 260, 1090785280, 7760, 260, 1090785280, 7760, 260, 1090785280, 7760, 260, 1090785280, 7776, 260, 1090785280, 7776, 260, 1090785280, 7776, 260, 1090785280, 7776, 260, 1090785280, 7776, 260, 1090785280, 7776, 260, 1090785280, 7792, 260, 1090785280, 7792, 260, 1090785280, 7792, 260, 1090785280, 7792, 260, 1090785280, 7792, 260, 1090785280, 7792, 260, 1090785280, 8464, 4, 1073741824, 8464, 4, 1073741824, 8480, 4, 1073741824, 8480, 4, 1073741824, 8496, 4, 1073741824, 8496, 4, 1073741824, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 9600, 68174084, 1090785345, 10816, 0, 512, 11776, 0, 136314880, 11776, 0, 136314880, 11792, 0, 136314880, 11792, 0, 136314880, 11808, 0, 136314880, 11808, 0, 136314880, 14784, 0, 134217728, 14800, 0, 134217728, 14816, 0, 134217728] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756433025855711108_120_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756433025855711108_120_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6143c645 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756433025855711108_120_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,85 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 31, 4227858432, 1728, 31, 4227858432, 1728, 31, 4227858432, 1728, 31, 4227858432, 1728, 31, 4227858432, 1728, 31, 4227858432, 1728, 31, 4227858432, 1728, 31, 4227858432, 1728, 31, 4227858432, 1728, 31, 4227858432, 1728, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 1744, 31, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2432, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432, 2448, 127, 4227858432] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756433075897414240_122_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756433075897414240_122_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d0241d3b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756433075897414240_122_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,233 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 43))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((199 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 2752, 1, 0, 3904, 0, 1048576, 4864, 268435456, 0, 6080, 268435456, 0, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11776, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 11792, 1638, 1717977088, 12736, 26214, 0, 12736, 26214, 0, 12736, 26214, 0, 12736, 26214, 0, 12736, 26214, 0, 12736, 26214, 0, 12736, 26214, 0, 12736, 26214, 0, 12740, 26214, 0, 12740, 26214, 0, 12740, 26214, 0, 12740, 26214, 0, 12740, 26214, 0, 12740, 26214, 0, 12740, 26214, 0, 12740, 26214, 0, 12744, 26214, 0, 12744, 26214, 0, 12744, 26214, 0, 12744, 26214, 0, 12744, 26214, 0, 12744, 26214, 0, 12744, 26214, 0, 12744, 26214, 0, 12752, 26214, 0, 12752, 26214, 0, 12752, 26214, 0, 12752, 26214, 0, 12752, 26214, 0, 12752, 26214, 0, 12752, 26214, 0, 12752, 26214, 0, 12756, 26214, 0, 12756, 26214, 0, 12756, 26214, 0, 12756, 26214, 0, 12756, 26214, 0, 12756, 26214, 0, 12756, 26214, 0, 12756, 26214, 0, 12760, 26214, 0, 12760, 26214, 0, 12760, 26214, 0, 12760, 26214, 0, 12760, 26214, 0, 12760, 26214, 0, 12760, 26214, 0, 12760, 26214, 0, 13376, 559240, 0, 13376, 559240, 0, 13376, 559240, 0, 13376, 559240, 0, 13376, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756433437785905874_127_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756433437785905874_127_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b70189ef --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756433437785905874_127_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,216 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((99 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 18)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 765 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2816, 0, 2048, 2820, 0, 2048, 2832, 0, 2048, 2836, 0, 2048, 4416, 272696336, 68174084, 4416, 272696336, 68174084, 4416, 272696336, 68174084, 4416, 272696336, 68174084, 4416, 272696336, 68174084, 4416, 272696336, 68174084, 4416, 272696336, 68174084, 4416, 272696336, 68174084, 4416, 272696336, 68174084, 4416, 272696336, 68174084, 4432, 272696336, 68174084, 4432, 272696336, 68174084, 4432, 272696336, 68174084, 4432, 272696336, 68174084, 4432, 272696336, 68174084, 4432, 272696336, 68174084, 4432, 272696336, 68174084, 4432, 272696336, 68174084, 4432, 272696336, 68174084, 4432, 272696336, 68174084, 5888, 0, 1227133513, 5888, 0, 1227133513, 5888, 0, 1227133513, 5888, 0, 1227133513, 5888, 0, 1227133513, 5888, 0, 1227133513, 5888, 0, 1227133513, 5888, 0, 1227133513, 5888, 0, 1227133513, 5888, 0, 1227133513, 5888, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5892, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5904, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 5908, 0, 1227133513, 6336, 9586980, 0, 6336, 9586980, 0, 6336, 9586980, 0, 6336, 9586980, 0, 6336, 9586980, 0, 6336, 9586980, 0, 6336, 9586980, 0, 6336, 9586980, 0, 6340, 9586980, 0, 6340, 9586980, 0, 6340, 9586980, 0, 6340, 9586980, 0, 6340, 9586980, 0, 6340, 9586980, 0, 6340, 9586980, 0, 6340, 9586980, 0, 6352, 9586980, 0, 6352, 9586980, 0, 6352, 9586980, 0, 6352, 9586980, 0, 6352, 9586980, 0, 6352, 9586980, 0, 6352, 9586980, 0, 6352, 9586980, 0, 6356, 9586980, 0, 6356, 9586980, 0, 6356, 9586980, 0, 6356, 9586980, 0, 6356, 9586980, 0, 6356, 9586980, 0, 6356, 9586980, 0, 6356, 9586980, 0, 11392, 1073741826, 805306384, 11392, 1073741826, 805306384, 11392, 1073741826, 805306384, 11392, 1073741826, 805306384, 11392, 1073741826, 805306384, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 11136, 262141, 0, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10752, 357826560, 1163220293, 10496, 2097152, 134217856, 10496, 2097152, 134217856, 10496, 2097152, 134217856, 10112, 0, 32, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12560, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 12576, 524287, 4286578688, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13264, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272, 13280, 7, 4294966272] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756433512945495768_129_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756433512945495768_129_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9edc27ea --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756433512945495768_129_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 41))) { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3840, 18, 612368384, 3840, 18, 612368384, 3840, 18, 612368384, 3840, 18, 612368384, 3840, 18, 612368384, 4544, 18, 613564416, 4544, 18, 613564416, 4544, 18, 613564416, 4544, 18, 613564416, 4544, 18, 613564416, 4544, 18, 613564416, 4544, 18, 613564416, 4544, 18, 613564416, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11712, 511, 0, 11712, 511, 0, 11712, 511, 0, 11712, 511, 0, 11712, 511, 0, 11712, 511, 0, 11712, 511, 0, 11712, 511, 0, 11712, 511, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756433566837102913_132_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756433566837102913_132_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..657d44c1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756433566837102913_132_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,336 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 55)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 32))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 39)) { + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((268 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 147 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 4195328, 0, 1856, 4195328, 0, 1872, 4195328, 0, 1872, 4195328, 0, 1888, 4195328, 0, 1888, 4195328, 0, 6080, 537002016, 2097664, 6080, 537002016, 2097664, 6080, 537002016, 2097664, 6080, 537002016, 2097664, 6080, 537002016, 2097664, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6400, 572662306, 572662306, 6848, 699050, 0, 6848, 699050, 0, 6848, 699050, 0, 6848, 699050, 0, 6848, 699050, 0, 6848, 699050, 0, 6848, 699050, 0, 6848, 699050, 0, 6848, 699050, 0, 6848, 699050, 0, 7488, 17, 0, 7488, 17, 0, 11712, 64, 0, 14096, 0, 16384, 14112, 0, 16384, 15312, 0, 67108864, 15328, 0, 67108864, 17600, 559240, 0, 17600, 559240, 0, 17600, 559240, 0, 17600, 559240, 0, 17600, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756433748380575631_134_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756433748380575631_134_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2b0b9cab --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756433748380575631_134_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((25 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 57))) { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1089 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1600, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1604, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1608, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1616, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1620, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1624, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1632, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1636, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 1640, 0, 1431655764, 2048, 0, 1426063360, 2048, 0, 1426063360, 2048, 0, 1426063360, 2048, 0, 1426063360, 2052, 0, 1426063360, 2052, 0, 1426063360, 2052, 0, 1426063360, 2052, 0, 1426063360, 2056, 0, 1426063360, 2056, 0, 1426063360, 2056, 0, 1426063360, 2056, 0, 1426063360, 2064, 0, 1426063360, 2064, 0, 1426063360, 2064, 0, 1426063360, 2064, 0, 1426063360, 2068, 0, 1426063360, 2068, 0, 1426063360, 2068, 0, 1426063360, 2068, 0, 1426063360, 2072, 0, 1426063360, 2072, 0, 1426063360, 2072, 0, 1426063360, 2072, 0, 1426063360, 2080, 0, 1426063360, 2080, 0, 1426063360, 2080, 0, 1426063360, 2080, 0, 1426063360, 2084, 0, 1426063360, 2084, 0, 1426063360, 2084, 0, 1426063360, 2084, 0, 1426063360, 2088, 0, 1426063360, 2088, 0, 1426063360, 2088, 0, 1426063360, 2088, 0, 1426063360, 3200, 268435456, 64, 3200, 268435456, 64, 3216, 268435456, 64, 3216, 268435456, 64, 3232, 268435456, 64, 3232, 268435456, 64, 7360, 0, 33554432, 7808, 32, 0, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8896, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8912, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 8928, 2863311530, 2829757098, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9472, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9488, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 9504, 1431655509, 1431654741, 10048, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756434036625132057_140_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756434036625132057_140_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..395e67a5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756434036625132057_140_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 47))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 183 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 127, 3221225472, 1088, 127, 3221225472, 1088, 127, 3221225472, 1088, 127, 3221225472, 1088, 127, 3221225472, 1088, 127, 3221225472, 1088, 127, 3221225472, 1088, 127, 3221225472, 1088, 127, 3221225472, 1728, 17, 0, 1728, 17, 0, 2624, 68, 1145323520, 2624, 68, 1145323520, 2624, 68, 1145323520, 2624, 68, 1145323520, 2624, 68, 1145323520, 2624, 68, 1145323520, 2624, 68, 1145323520, 5904, 0, 134217728, 5920, 0, 134217728, 6848, 127, 4026531840, 6848, 127, 4026531840, 6848, 127, 4026531840, 6848, 127, 4026531840, 6848, 127, 4026531840, 6848, 127, 4026531840, 6848, 127, 4026531840, 6848, 127, 4026531840, 6848, 127, 4026531840, 6848, 127, 4026531840, 6848, 127, 4026531840, 7552, 512, 2, 7552, 512, 2, 9856, 68174080, 4161, 9856, 68174080, 4161, 9856, 68174080, 4161, 9856, 68174080, 4161, 9856, 68174080, 4161, 9856, 68174080, 4161, 9856, 68174080, 4161, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 10432, 1431655680, 5461, 11136, 128, 64, 11136, 128, 64] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756434039015558645_141_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756434039015558645_141_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cef42c1c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756434039015558645_141_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,175 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 48))) { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() >= 59)) { + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 39)) { + if ((WaveGetLaneIndex() == 44)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 447 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3088, 0, 128, 3092, 0, 128, 3096, 0, 128, 3104, 0, 128, 3108, 0, 128, 3112, 0, 128, 4368, 0, 16384, 4372, 0, 16384, 4376, 0, 16384, 4384, 0, 16384, 4388, 0, 16384, 4392, 0, 16384, 5072, 8, 4, 5072, 8, 4, 5076, 8, 4, 5076, 8, 4, 5080, 8, 4, 5080, 8, 4, 5088, 8, 4, 5088, 8, 4, 5092, 8, 4, 5092, 8, 4, 5096, 8, 4, 5096, 8, 4, 5888, 0, 4160749568, 5888, 0, 4160749568, 5888, 0, 4160749568, 5888, 0, 4160749568, 5888, 0, 4160749568, 9028, 0, 536870912, 9032, 0, 536870912, 9036, 0, 536870912, 9044, 0, 536870912, 9048, 0, 536870912, 9052, 0, 536870912, 9060, 0, 536870912, 9064, 0, 536870912, 9068, 0, 536870912, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10320, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10336, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104, 10352, 262143, 4294959104] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756434091721620827_142_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756434091721620827_142_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17c5e268 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756434091721620827_142_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,274 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 41)) { + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((179 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 60))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((205 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i3 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 0, 2454257664, 768, 0, 2454257664, 768, 0, 2454257664, 768, 0, 2454257664, 768, 0, 2454257664, 768, 0, 2454257664, 2112, 0, 2415919104, 2112, 0, 2415919104, 2128, 0, 2415919104, 2128, 0, 2415919104, 2144, 0, 2415919104, 2144, 0, 2415919104, 2816, 0, 2449473536, 2816, 0, 2449473536, 2816, 0, 2449473536, 2832, 0, 2449473536, 2832, 0, 2449473536, 2832, 0, 2449473536, 2848, 0, 2449473536, 2848, 0, 2449473536, 2848, 0, 2449473536, 4496, 272696336, 68174084, 4496, 272696336, 68174084, 4496, 272696336, 68174084, 4496, 272696336, 68174084, 4496, 272696336, 68174084, 4496, 272696336, 68174084, 4496, 272696336, 68174084, 4496, 272696336, 68174084, 4496, 272696336, 68174084, 4496, 272696336, 68174084, 4500, 272696336, 68174084, 4500, 272696336, 68174084, 4500, 272696336, 68174084, 4500, 272696336, 68174084, 4500, 272696336, 68174084, 4500, 272696336, 68174084, 4500, 272696336, 68174084, 4500, 272696336, 68174084, 4500, 272696336, 68174084, 4500, 272696336, 68174084, 4504, 272696336, 68174084, 4504, 272696336, 68174084, 4504, 272696336, 68174084, 4504, 272696336, 68174084, 4504, 272696336, 68174084, 4504, 272696336, 68174084, 4504, 272696336, 68174084, 4504, 272696336, 68174084, 4504, 272696336, 68174084, 4504, 272696336, 68174084, 4512, 272696336, 68174084, 4512, 272696336, 68174084, 4512, 272696336, 68174084, 4512, 272696336, 68174084, 4512, 272696336, 68174084, 4512, 272696336, 68174084, 4512, 272696336, 68174084, 4512, 272696336, 68174084, 4512, 272696336, 68174084, 4512, 272696336, 68174084, 4516, 272696336, 68174084, 4516, 272696336, 68174084, 4516, 272696336, 68174084, 4516, 272696336, 68174084, 4516, 272696336, 68174084, 4516, 272696336, 68174084, 4516, 272696336, 68174084, 4516, 272696336, 68174084, 4516, 272696336, 68174084, 4516, 272696336, 68174084, 4520, 272696336, 68174084, 4520, 272696336, 68174084, 4520, 272696336, 68174084, 4520, 272696336, 68174084, 4520, 272696336, 68174084, 4520, 272696336, 68174084, 4520, 272696336, 68174084, 4520, 272696336, 68174084, 4520, 272696336, 68174084, 4520, 272696336, 68174084, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 8832, 17, 0, 8832, 17, 0, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 9728, 1145324612, 1145324612, 11456, 136, 2281701376, 11456, 136, 2281701376, 11456, 136, 2281701376, 11456, 136, 2281701376, 11460, 136, 2281701376, 11460, 136, 2281701376, 11460, 136, 2281701376, 11460, 136, 2281701376, 11472, 136, 2281701376, 11472, 136, 2281701376, 11472, 136, 2281701376, 11472, 136, 2281701376, 11476, 136, 2281701376, 11476, 136, 2281701376, 11476, 136, 2281701376, 11476, 136, 2281701376, 13120, 8, 0, 13124, 8, 0, 13136, 8, 0, 13140, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756434231804913099_144_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756434231804913099_144_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1dd600ae --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756434231804913099_144_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,248 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 62))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 30)) { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 54)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 900 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 128, 0, 1184, 128, 0, 4432, 0, 8388608, 4448, 0, 8388608, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5952, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 5968, 134217727, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6912, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6916, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6928, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 6932, 1073741823, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8576, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8580, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8592, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 8596, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9152, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9156, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9168, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 9172, 715827882, 0, 12560, 0, 4194304, 12576, 0, 4194304] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756434316757443982_145_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756434316757443982_145_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1ae31c3d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756434316757443982_145_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,70 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1232, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528, 1248, 1048575, 4294934528] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756434699728096483_147_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756434699728096483_147_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ea8f1f42 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756434699728096483_147_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 32)) { + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 30)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 396 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 219, 2147483648, 3584, 219, 2147483648, 3584, 219, 2147483648, 3584, 219, 2147483648, 3584, 219, 2147483648, 3584, 219, 2147483648, 3584, 219, 2147483648, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5328, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5344, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5360, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5904, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5920, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 5936, 2317910016, 42608802, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 6208, 613566756, 1227133513, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0, 7104, 16383, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756434787208182272_152_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756434787208182272_152_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..67e21db7 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756434787208182272_152_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,188 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 41)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6336, 17, 0, 6336, 17, 0, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 6912, 286331153, 286331153, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 7232, 2004318071, 2004318071, 8144, 0, 2290649224, 8144, 0, 2290649224, 8144, 0, 2290649224, 8144, 0, 2290649224, 8144, 0, 2290649224, 8144, 0, 2290649224, 8144, 0, 2290649224, 8144, 0, 2290649224] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756434988352842784_155_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756434988352842784_155_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..315433b1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756434988352842784_155_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 34)) { + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3648, 73, 0, 3648, 73, 0, 3648, 73, 0, 5712, 0, 3067805696, 5712, 0, 3067805696, 5712, 0, 3067805696, 5712, 0, 3067805696, 5712, 0, 3067805696, 5712, 0, 3067805696, 5712, 0, 3067805696, 5712, 0, 3067805696, 5712, 0, 3067805696, 5712, 0, 3067805696, 5712, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5716, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5720, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5728, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5732, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 5736, 0, 3067805696, 6672, 4194816, 0, 6672, 4194816, 0, 6676, 4194816, 0, 6676, 4194816, 0, 6680, 4194816, 0, 6680, 4194816, 0, 6688, 4194816, 0, 6688, 4194816, 0, 6692, 4194816, 0, 6692, 4194816, 0, 6696, 4194816, 0, 6696, 4194816, 0, 7632, 0, 128, 7648, 0, 128, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513, 7936, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435033531912412_156_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435033531912412_156_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e3d04835 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435033531912412_156_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,252 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 25)) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 213 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 9, 0, 768, 9, 0, 2112, 9, 0, 2112, 9, 0, 2128, 9, 0, 2128, 9, 0, 2144, 9, 0, 2144, 9, 0, 2816, 9, 0, 2816, 9, 0, 2832, 9, 0, 2832, 9, 0, 2848, 9, 0, 2848, 9, 0, 4240, 0, 2048, 4256, 0, 2048, 5392, 18, 612368384, 5392, 18, 612368384, 5392, 18, 612368384, 5392, 18, 612368384, 5392, 18, 612368384, 5408, 18, 612368384, 5408, 18, 612368384, 5408, 18, 612368384, 5408, 18, 612368384, 5408, 18, 612368384, 6096, 1170, 613416960, 6096, 1170, 613416960, 6096, 1170, 613416960, 6096, 1170, 613416960, 6096, 1170, 613416960, 6096, 1170, 613416960, 6096, 1170, 613416960, 6096, 1170, 613416960, 6112, 1170, 613416960, 6112, 1170, 613416960, 6112, 1170, 613416960, 6112, 1170, 613416960, 6112, 1170, 613416960, 6112, 1170, 613416960, 6112, 1170, 613416960, 6112, 1170, 613416960, 6544, 0, 4, 6560, 0, 4, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 8448, 5, 0, 8448, 5, 0, 8464, 5, 0, 8464, 5, 0, 9808, 268435456, 0, 9824, 268435456, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435051481582940_157_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435051481582940_157_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e5ac2c60 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435051481582940_157_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,277 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 1)) { + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 56)) { + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 231 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 585, 2147483648, 1280, 585, 2147483648, 1280, 585, 2147483648, 1280, 585, 2147483648, 1280, 585, 2147483648, 1920, 65, 0, 1920, 65, 0, 3456, 585, 2147483648, 3456, 585, 2147483648, 3456, 585, 2147483648, 3456, 585, 2147483648, 3456, 585, 2147483648, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 4352, 613566756, 1227133513, 11648, 73, 0, 11648, 73, 0, 11648, 73, 0, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435054383921927_158_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435054383921927_158_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435054383921927_158_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435054610259772_159_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435054610259772_159_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..221c4d31 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435054610259772_159_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,258 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 33)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() >= 40)) { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 53)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 231 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 2432, 0, 524288, 2448, 0, 524288, 4224, 0, 2281701376, 4224, 0, 2281701376, 4240, 0, 2281701376, 4240, 0, 2281701376, 4928, 0, 2281701376, 4928, 0, 2281701376, 4944, 0, 2281701376, 4944, 0, 2281701376, 12096, 0, 1430257664, 12096, 0, 1430257664, 12096, 0, 1430257664, 12096, 0, 1430257664, 12096, 0, 1430257664, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 13568, 0, 1431655680, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765, 14144, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435067431978884_160_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435067431978884_160_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0b85e6ca --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435067431978884_160_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 19)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 414 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4992, 0, 286330880, 4992, 0, 286330880, 4992, 0, 286330880, 4992, 0, 286330880, 4992, 0, 286330880, 5008, 0, 286330880, 5008, 0, 286330880, 5008, 0, 286330880, 5008, 0, 286330880, 5008, 0, 286330880, 5024, 0, 286330880, 5024, 0, 286330880, 5024, 0, 286330880, 5024, 0, 286330880, 5024, 0, 286330880, 6148, 1, 0, 6152, 1, 0, 6164, 1, 0, 6168, 1, 0, 6180, 1, 0, 6184, 1, 0, 6980, 4369, 286330880, 6980, 4369, 286330880, 6980, 4369, 286330880, 6980, 4369, 286330880, 6980, 4369, 286330880, 6980, 4369, 286330880, 6980, 4369, 286330880, 6980, 4369, 286330880, 6980, 4369, 286330880, 6984, 4369, 286330880, 6984, 4369, 286330880, 6984, 4369, 286330880, 6984, 4369, 286330880, 6984, 4369, 286330880, 6984, 4369, 286330880, 6984, 4369, 286330880, 6984, 4369, 286330880, 6984, 4369, 286330880, 6996, 4369, 286330880, 6996, 4369, 286330880, 6996, 4369, 286330880, 6996, 4369, 286330880, 6996, 4369, 286330880, 6996, 4369, 286330880, 6996, 4369, 286330880, 6996, 4369, 286330880, 6996, 4369, 286330880, 7000, 4369, 286330880, 7000, 4369, 286330880, 7000, 4369, 286330880, 7000, 4369, 286330880, 7000, 4369, 286330880, 7000, 4369, 286330880, 7000, 4369, 286330880, 7000, 4369, 286330880, 7000, 4369, 286330880, 7012, 4369, 286330880, 7012, 4369, 286330880, 7012, 4369, 286330880, 7012, 4369, 286330880, 7012, 4369, 286330880, 7012, 4369, 286330880, 7012, 4369, 286330880, 7012, 4369, 286330880, 7012, 4369, 286330880, 7016, 4369, 286330880, 7016, 4369, 286330880, 7016, 4369, 286330880, 7016, 4369, 286330880, 7016, 4369, 286330880, 7016, 4369, 286330880, 7016, 4369, 286330880, 7016, 4369, 286330880, 7016, 4369, 286330880, 9152, 0, 33554432, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 9472, 1145324612, 1145324612, 10112, 34952, 0, 10112, 34952, 0, 10112, 34952, 0, 10112, 34952, 0, 11264, 2184, 0, 11264, 2184, 0, 11264, 2184, 0, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 14336, 2863311530, 2863311530, 13952, 0, 67108864, 13696, 0, 1364459520, 13696, 0, 1364459520, 13696, 0, 1364459520, 13696, 0, 1364459520, 13696, 0, 1364459520, 13696, 0, 1364459520] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435106749484398_162_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435106749484398_162_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c21f67d9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435106749484398_162_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 58))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 765 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 2816, 1145324612, 1145324612, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4356, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4360, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4372, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4376, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4932, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4936, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4948, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 4952, 2290649224, 2290649224, 6144, 0, 134219776, 6144, 0, 134219776, 6160, 0, 134219776, 6160, 0, 134219776, 7248, 19173961, 0, 7248, 19173961, 0, 7248, 19173961, 0, 7248, 19173961, 0, 7248, 19173961, 0, 7248, 19173961, 0, 7248, 19173961, 0, 7248, 19173961, 0, 7248, 19173961, 0, 7264, 19173961, 0, 7264, 19173961, 0, 7264, 19173961, 0, 7264, 19173961, 0, 7264, 19173961, 0, 7264, 19173961, 0, 7264, 19173961, 0, 7264, 19173961, 0, 7264, 19173961, 0, 7280, 19173961, 0, 7280, 19173961, 0, 7280, 19173961, 0, 7280, 19173961, 0, 7280, 19173961, 0, 7280, 19173961, 0, 7280, 19173961, 0, 7280, 19173961, 0, 7280, 19173961, 0, 8960, 0, 256, 8976, 0, 256, 8992, 0, 256, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9872, 272696336, 68174084, 9872, 272696336, 68174084, 9872, 272696336, 68174084, 9872, 272696336, 68174084, 9872, 272696336, 68174084, 9872, 272696336, 68174084, 9872, 272696336, 68174084, 9872, 272696336, 68174084, 9872, 272696336, 68174084, 9872, 272696336, 68174084, 9888, 272696336, 68174084, 9888, 272696336, 68174084, 9888, 272696336, 68174084, 9888, 272696336, 68174084, 9888, 272696336, 68174084, 9888, 272696336, 68174084, 9888, 272696336, 68174084, 9888, 272696336, 68174084, 9888, 272696336, 68174084, 9888, 272696336, 68174084, 12160, 0, 67108864, 12176, 0, 67108864, 12192, 0, 67108864, 13952, 272696336, 68174084, 13952, 272696336, 68174084, 13952, 272696336, 68174084, 13952, 272696336, 68174084, 13952, 272696336, 68174084, 13952, 272696336, 68174084, 13952, 272696336, 68174084, 13952, 272696336, 68174084, 13952, 272696336, 68174084, 13952, 272696336, 68174084, 13968, 272696336, 68174084, 13968, 272696336, 68174084, 13968, 272696336, 68174084, 13968, 272696336, 68174084, 13968, 272696336, 68174084, 13968, 272696336, 68174084, 13968, 272696336, 68174084, 13968, 272696336, 68174084, 13968, 272696336, 68174084, 13968, 272696336, 68174084, 13984, 272696336, 68174084, 13984, 272696336, 68174084, 13984, 272696336, 68174084, 13984, 272696336, 68174084, 13984, 272696336, 68174084, 13984, 272696336, 68174084, 13984, 272696336, 68174084, 13984, 272696336, 68174084, 13984, 272696336, 68174084, 13984, 272696336, 68174084, 15104, 36, 1224736768, 15104, 36, 1224736768, 15104, 36, 1224736768, 15104, 36, 1224736768, 15104, 36, 1224736768, 16256, 4, 0, 16960, 4, 1224736768, 16960, 4, 1224736768, 16960, 4, 1224736768, 16960, 4, 1224736768, 17856, 85, 0, 17856, 85, 0, 17856, 85, 0, 17856, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435230869814058_164_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435230869814058_164_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..43cb3760 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435230869814058_164_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 5)) { + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 28)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 21, 0, 2880, 21, 0, 2880, 21, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435231518774983_165_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435231518774983_165_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e9a73895 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435231518774983_165_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,505 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((352 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 42)) { + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((362 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((372 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((381 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((386 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((393 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((397 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((404 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (425 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 18)) { + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((444 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((470 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 47))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((488 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((499 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((510 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((517 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (531 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6720, 0, 286331152, 6720, 0, 286331152, 6720, 0, 286331152, 6720, 0, 286331152, 6720, 0, 286331152, 6720, 0, 286331152, 6720, 0, 286331152, 7872, 1, 268435456, 7872, 1, 268435456, 10304, 1, 0, 10320, 1, 0, 12032, 17476, 1140850688, 12032, 17476, 1140850688, 12032, 17476, 1140850688, 12032, 17476, 1140850688, 12032, 17476, 1140850688, 12032, 17476, 1140850688, 12672, 68, 0, 12672, 68, 0, 13952, 4, 1145307136, 13952, 4, 1145307136, 13952, 4, 1145307136, 13952, 4, 1145307136, 13952, 4, 1145307136, 14528, 1145307136, 17476, 14528, 1145307136, 17476, 14528, 1145307136, 17476, 14528, 1145307136, 17476, 14528, 1145307136, 17476, 14528, 1145307136, 17476, 14528, 1145307136, 17476, 14528, 1145307136, 17476, 20736, 7, 4026531840, 20736, 7, 4026531840, 20736, 7, 4026531840, 20736, 7, 4026531840, 20736, 7, 4026531840, 20736, 7, 4026531840, 20736, 7, 4026531840, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 27200, 131071, 3221225472, 30100, 256, 0, 30104, 256, 0, 30116, 256, 0, 30120, 256, 0, 31248, 0, 3221225472, 31248, 0, 3221225472, 31264, 0, 3221225472, 31264, 0, 3221225472, 31952, 0, 3221225472, 31952, 0, 3221225472, 31968, 0, 3221225472, 31968, 0, 3221225472, 32656, 0, 3221225472, 32656, 0, 3221225472, 32672, 0, 3221225472, 32672, 0, 3221225472, 33984, 63, 3221225472, 33984, 63, 3221225472, 33984, 63, 3221225472, 33984, 63, 3221225472, 33984, 63, 3221225472, 33984, 63, 3221225472, 33984, 63, 3221225472, 33984, 63, 3221225472] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435278937389564_166_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435278937389564_166_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..536c4992 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435278937389564_166_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 53)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 58))) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 54)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 69905, 268435456, 2496, 69905, 268435456, 2496, 69905, 268435456, 2496, 69905, 268435456, 2496, 69905, 268435456, 2496, 69905, 268435456, 2512, 69905, 268435456, 2512, 69905, 268435456, 2512, 69905, 268435456, 2512, 69905, 268435456, 2512, 69905, 268435456, 2512, 69905, 268435456, 2528, 69905, 268435456, 2528, 69905, 268435456, 2528, 69905, 268435456, 2528, 69905, 268435456, 2528, 69905, 268435456, 2528, 69905, 268435456, 2944, 0, 16777216, 2960, 0, 16777216, 2976, 0, 16777216, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 7808, 1145324612, 1145324612, 11136, 0, 526336, 11136, 0, 526336, 11152, 0, 526336, 11152, 0, 526336, 11168, 0, 526336, 11168, 0, 526336, 12096, 8388608, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435303558327364_168_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435303558327364_168_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..23629af8 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435303558327364_168_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,323 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 44)) { + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 56)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 62))) { + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 39)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 11840, 1145324612, 1145324612, 12480, 8, 0, 15232, 8390656, 134250504, 15232, 8390656, 134250504, 15232, 8390656, 134250504, 15232, 8390656, 134250504, 15232, 8390656, 134250504] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435569538838655_172_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435569538838655_172_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f79bbca5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435569538838655_172_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 3)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435570581604724_173_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435570581604724_173_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e42c349c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435570581604724_173_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,269 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 47)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 19)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 177 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4096, 0, 1145307136, 4096, 0, 1145307136, 4096, 0, 1145307136, 4096, 0, 1145307136, 4736, 64, 0, 5312, 4195328, 67125252, 5312, 4195328, 67125252, 5312, 4195328, 67125252, 5312, 4195328, 67125252, 5312, 4195328, 67125252, 5632, 67125252, 1074004032, 5632, 67125252, 1074004032, 5632, 67125252, 1074004032, 5632, 67125252, 1074004032, 5632, 67125252, 1074004032, 5632, 67125252, 1074004032, 6528, 8, 0, 7424, 2148008064, 8390656, 7424, 2148008064, 8390656, 7424, 2148008064, 8390656, 7424, 2148008064, 8390656, 7424, 2148008064, 8390656, 8064, 2048, 0, 9152, 73, 0, 9152, 73, 0, 9152, 73, 0, 9728, 272696336, 68174084, 9728, 272696336, 68174084, 9728, 272696336, 68174084, 9728, 272696336, 68174084, 9728, 272696336, 68174084, 9728, 272696336, 68174084, 9728, 272696336, 68174084, 9728, 272696336, 68174084, 9728, 272696336, 68174084, 9728, 272696336, 68174084, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513, 10048, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435680869699602_175_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435680869699602_175_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0ee7677f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435680869699602_175_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,372 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 58)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 30)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 17)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 47)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 47)) { + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((342 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (346 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 4160, 0, 134217728, 4176, 0, 134217728, 4192, 0, 134217728, 4736, 545392672, 2130440, 4736, 545392672, 2130440, 4736, 545392672, 2130440, 4736, 545392672, 2130440, 4736, 545392672, 2130440, 4736, 545392672, 2130440, 4736, 545392672, 2130440, 4736, 545392672, 2130440, 4736, 545392672, 2130440, 11328, 268501008, 1048832, 11328, 268501008, 1048832, 11328, 268501008, 1048832, 11328, 268501008, 1048832, 11328, 268501008, 1048832, 11648, 1048832, 16781313, 11648, 1048832, 16781313, 11648, 1048832, 16781313, 11648, 1048832, 16781313, 11648, 1048832, 16781313, 20928, 0, 134217728, 20944, 0, 134217728, 20960, 0, 134217728] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435705001902050_176_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435705001902050_176_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..334e8750 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435705001902050_176_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,155 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 58))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 4352, 131072, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435705993503884_177_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435705993503884_177_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435705993503884_177_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435706635506197_178_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435706635506197_178_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4ae47ac8 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435706635506197_178_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 32)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((28 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3136, 262144, 268435456, 3136, 262144, 268435456, 3152, 262144, 268435456, 3152, 262144, 268435456, 3168, 262144, 268435456, 3168, 262144, 268435456] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435729858132400_179_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435729858132400_179_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7b83e278 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435729858132400_179_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 61))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 6400, 65, 0, 6400, 65, 0, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 6976, 1363481681, 340870420, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765, 7296, 1430607189, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435733551832556_180_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435733551832556_180_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..35b07a28 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435733551832556_180_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,375 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 23)) { + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 50))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 47)) { + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } else { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 52))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 43))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 62))) { + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((339 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((380 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 56))) { + if (((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (424 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (435 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8512, 73, 0, 8512, 73, 0, 8512, 73, 0, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 20288, 292, 136314880, 20288, 292, 136314880, 20288, 292, 136314880, 20288, 292, 136314880, 20288, 292, 136314880, 27840, 2340, 136613888, 27840, 2340, 136613888, 27840, 2340, 136613888, 27840, 2340, 136613888, 27840, 2340, 136613888, 27840, 2340, 136613888, 27840, 2340, 136613888, 27840, 2340, 136613888, 27840, 2340, 136613888] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756435948922160356_183_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756435948922160356_183_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..76df1dc1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756435948922160356_183_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 24))) { + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 2147516416, 16777216, 1360, 2147516416, 16777216, 1360, 2147516416, 16777216, 3536, 536870912, 0, 4496, 553648384, 0, 4496, 553648384, 0, 4496, 553648384, 0, 4500, 553648384, 0, 4500, 553648384, 0, 4500, 553648384, 0, 4944, 16777472, 0, 4944, 16777472, 0, 4948, 16777472, 0, 4948, 16777472, 0, 6160, 33554433, 8388736, 6160, 33554433, 8388736, 6160, 33554433, 8388736, 6160, 33554433, 8388736, 6800, 17, 0, 6800, 17, 0, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 7696, 1145324612, 71582788, 8144, 559240, 0, 8144, 559240, 0, 8144, 559240, 0, 8144, 559240, 0, 8144, 559240, 0, 9104, 33554464, 1048576, 9104, 33554464, 1048576, 9104, 33554464, 1048576] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436294508318356_185_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436294508318356_185_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6bfd56e0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436294508318356_185_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,230 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 41))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 32)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 105 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 16777216, 67112960, 2624, 16777216, 67112960, 2624, 16777216, 67112960, 2628, 16777216, 67112960, 2628, 16777216, 67112960, 2628, 16777216, 67112960, 2640, 16777216, 67112960, 2640, 16777216, 67112960, 2640, 16777216, 67112960, 2644, 16777216, 67112960, 2644, 16777216, 67112960, 2644, 16777216, 67112960, 8080, 0, 256, 8096, 0, 256, 10176, 1145324612, 0, 10176, 1145324612, 0, 10176, 1145324612, 0, 10176, 1145324612, 0, 10176, 1145324612, 0, 10176, 1145324612, 0, 10176, 1145324612, 0, 10176, 1145324612, 0, 10752, 1145324612, 0, 10752, 1145324612, 0, 10752, 1145324612, 0, 10752, 1145324612, 0, 10752, 1145324612, 0, 10752, 1145324612, 0, 10752, 1145324612, 0, 10752, 1145324612, 0, 14976, 8390656, 134250504, 14976, 8390656, 134250504, 14976, 8390656, 134250504, 14976, 8390656, 134250504, 14976, 8390656, 134250504] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436313192024975_186_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436313192024975_186_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5e40981b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436313192024975_186_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,314 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 12)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 53))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 36)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((259 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + if ((i5 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 136347648, 2181570690, 1856, 136347648, 2181570690, 1856, 136347648, 2181570690, 1856, 136347648, 2181570690, 1856, 136347648, 2181570690, 1856, 136347648, 2181570690, 1856, 136347648, 2181570690, 1856, 136347648, 2181570690, 1856, 136347648, 2181570690, 1872, 136347648, 2181570690, 1872, 136347648, 2181570690, 1872, 136347648, 2181570690, 1872, 136347648, 2181570690, 1872, 136347648, 2181570690, 1872, 136347648, 2181570690, 1872, 136347648, 2181570690, 1872, 136347648, 2181570690, 1872, 136347648, 2181570690, 2432, 1090785280, 272696336, 2432, 1090785280, 272696336, 2432, 1090785280, 272696336, 2432, 1090785280, 272696336, 2432, 1090785280, 272696336, 2432, 1090785280, 272696336, 2432, 1090785280, 272696336, 2432, 1090785280, 272696336, 2432, 1090785280, 272696336, 2448, 1090785280, 272696336, 2448, 1090785280, 272696336, 2448, 1090785280, 272696336, 2448, 1090785280, 272696336, 2448, 1090785280, 272696336, 2448, 1090785280, 272696336, 2448, 1090785280, 272696336, 2448, 1090785280, 272696336, 2448, 1090785280, 272696336, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 13056, 68174084, 1090785345, 16576, 0, 16, 16580, 0, 16, 16584, 0, 16, 16592, 0, 16, 16596, 0, 16, 16600, 0, 16] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436377504111133_189_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436377504111133_189_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..089d576a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436377504111133_189_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 207 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 1536, 7, 4292870144, 2176, 5, 0, 2176, 5, 0, 3456, 7, 4261412864, 3456, 7, 4261412864, 3456, 7, 4261412864, 3456, 7, 4261412864, 3456, 7, 4261412864, 3456, 7, 4261412864, 3456, 7, 4261412864, 3456, 7, 4261412864, 3456, 7, 4261412864, 3456, 7, 4261412864, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 4032, 2863311488, 699050, 6080, 0, 16, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 6976, 286331136, 1118481, 9280, 838784, 0, 9280, 838784, 0, 9280, 838784, 0, 9280, 838784, 0, 9280, 838784, 0, 9280, 838784, 0, 9280, 838784, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436378835393692_190_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436378835393692_190_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..90446581 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436378835393692_190_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 60))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 309 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 2368, 2863311530, 2863311530, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 3008, 85, 0, 3008, 85, 0, 3008, 85, 0, 3008, 85, 0, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 3584, 1431655765, 1431655765, 8832, 64, 0, 8848, 64, 0, 8864, 64, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436380745952240_191_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436380745952240_191_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cc419aa5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436380745952240_191_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if ((WaveGetLaneIndex() >= 59)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 128, 16777222, 3584, 128, 16777222, 3584, 128, 16777222, 3584, 128, 16777222, 3328, 32768, 128, 3328, 32768, 128, 3072, 3967, 0, 3072, 3967, 0, 3072, 3967, 0, 3072, 3967, 0, 3072, 3967, 0, 3072, 3967, 0, 3072, 3967, 0, 3072, 3967, 0, 3072, 3967, 0, 3072, 3967, 0, 3072, 3967, 0, 2816, 0, 1, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888, 2560, 0, 4278189888] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436396753548443_193_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436396753548443_193_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cac51aea --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436396753548443_193_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436396991759260_194_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436396991759260_194_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..06e1133f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436396991759260_194_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,326 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 19)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((248 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 333 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [12416, 559240, 0, 12416, 559240, 0, 12416, 559240, 0, 12416, 559240, 0, 12416, 559240, 0, 13056, 73, 0, 13056, 73, 0, 13056, 73, 0, 13632, 272696336, 68174084, 13632, 272696336, 68174084, 13632, 272696336, 68174084, 13632, 272696336, 68174084, 13632, 272696336, 68174084, 13632, 272696336, 68174084, 13632, 272696336, 68174084, 13632, 272696336, 68174084, 13632, 272696336, 68174084, 13632, 272696336, 68174084, 14784, 36, 1207959552, 14784, 36, 1207959552, 14784, 36, 1207959552, 14784, 36, 1207959552, 15872, 260, 1090519040, 15872, 260, 1090519040, 15872, 260, 1090519040, 15872, 260, 1090519040, 15888, 260, 1090519040, 15888, 260, 1090519040, 15888, 260, 1090519040, 15888, 260, 1090519040, 15904, 260, 1090519040, 15904, 260, 1090519040, 15904, 260, 1090519040, 15904, 260, 1090519040, 16448, 260, 1090519040, 16448, 260, 1090519040, 16448, 260, 1090519040, 16448, 260, 1090519040, 16464, 260, 1090519040, 16464, 260, 1090519040, 16464, 260, 1090519040, 16464, 260, 1090519040, 16480, 260, 1090519040, 16480, 260, 1090519040, 16480, 260, 1090519040, 16480, 260, 1090519040, 6912, 546, 0, 6912, 546, 0, 6912, 546, 0, 7616, 8192, 0, 9792, 0, 2228224, 9792, 0, 2228224, 11536, 4, 1140850688, 11536, 4, 1140850688, 11536, 4, 1140850688, 11552, 4, 1140850688, 11552, 4, 1140850688, 11552, 4, 1140850688, 576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 3264, 273, 0, 3264, 273, 0, 3264, 273, 0, 3280, 273, 0, 3280, 273, 0, 3280, 273, 0, 3296, 273, 0, 3296, 273, 0, 3296, 273, 0, 3968, 0, 286261248, 3968, 0, 286261248, 3968, 0, 286261248, 3984, 0, 286261248, 3984, 0, 286261248, 3984, 0, 286261248, 4000, 0, 286261248, 4000, 0, 286261248, 4000, 0, 286261248, 4672, 0, 286326784, 4672, 0, 286326784, 4672, 0, 286326784, 4672, 0, 286326784, 4688, 0, 286326784, 4688, 0, 286326784, 4688, 0, 286326784, 4688, 0, 286326784, 4704, 0, 286326784, 4704, 0, 286326784, 4704, 0, 286326784, 4704, 0, 286326784] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436421389604951_195_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436421389604951_195_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..47fb99b2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436421389604951_195_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 57))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 42)) { + if ((WaveGetLaneIndex() < 18)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 4, 33554436, 1408, 4, 33554436, 1408, 4, 33554436, 1424, 4, 33554436, 1424, 4, 33554436, 1424, 4, 33554436, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3648, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 3664, 15, 4261347328, 5248, 0, 2048, 5252, 0, 2048, 5264, 0, 2048, 5268, 0, 2048] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436428848933632_196_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436428848933632_196_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0c9bafff --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436428848933632_196_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 42)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 18)) { + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 51)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208, 10752, 0, 4294966208] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436429379446618_197_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436429379446618_197_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..146cac9c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436429379446618_197_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,193 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() >= 52)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3712, 1431655765, 1431655765, 3456, 0, 8, 3200, 0, 32, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 2816, 2863311530, 2863311490, 7184, 0, 268435456, 7200, 0, 268435456, 8656, 0, 1073741824, 8672, 0, 1073741824, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720, 9920, 0, 4293918720] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436430643498779_198_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436430643498779_198_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cac51aea --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436430643498779_198_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436484924223543_201_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436484924223543_201_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d6ba983d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436484924223543_201_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 7)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 42)) { + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((261 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 666 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1168, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1184, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 1200, 286331153, 286331153, 3024, 257, 0, 3024, 257, 0, 3028, 257, 0, 3028, 257, 0, 3032, 257, 0, 3032, 257, 0, 3040, 257, 0, 3040, 257, 0, 3044, 257, 0, 3044, 257, 0, 3048, 257, 0, 3048, 257, 0, 3056, 257, 0, 3056, 257, 0, 3060, 257, 0, 3060, 257, 0, 3064, 257, 0, 3064, 257, 0, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 4736, 286331153, 286331153, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 5056, 1145324612, 1145324612, 6848, 85, 0, 6848, 85, 0, 6848, 85, 0, 6848, 85, 0, 7488, 0, 2147483648, 9024, 0, 2863310848, 9024, 0, 2863310848, 9024, 0, 2863310848, 9024, 0, 2863310848, 9024, 0, 2863310848, 9024, 0, 2863310848, 9024, 0, 2863310848, 9024, 0, 2863310848, 9024, 0, 2863310848, 9024, 0, 2863310848, 9024, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9040, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9056, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9600, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9616, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 9632, 0, 2863310848, 11008, 0, 2863300608, 11008, 0, 2863300608, 11008, 0, 2863300608, 11008, 0, 2863300608, 11008, 0, 2863300608, 11008, 0, 2863300608, 11008, 0, 2863300608, 11008, 0, 2863300608, 11008, 0, 2863300608, 11024, 0, 2863300608, 11024, 0, 2863300608, 11024, 0, 2863300608, 11024, 0, 2863300608, 11024, 0, 2863300608, 11024, 0, 2863300608, 11024, 0, 2863300608, 11024, 0, 2863300608, 11024, 0, 2863300608, 11040, 0, 2863300608, 11040, 0, 2863300608, 11040, 0, 2863300608, 11040, 0, 2863300608, 11040, 0, 2863300608, 11040, 0, 2863300608, 11040, 0, 2863300608, 11040, 0, 2863300608, 11040, 0, 2863300608, 14080, 1024, 8388608, 14080, 1024, 8388608, 16720, 1024, 8388608, 16720, 1024, 8388608, 16724, 1024, 8388608, 16724, 1024, 8388608, 16728, 1024, 8388608, 16728, 1024, 8388608, 16736, 1024, 8388608, 16736, 1024, 8388608, 16740, 1024, 8388608, 16740, 1024, 8388608, 16744, 1024, 8388608, 16744, 1024, 8388608, 16752, 1024, 8388608, 16752, 1024, 8388608, 16756, 1024, 8388608, 16756, 1024, 8388608, 16760, 1024, 8388608, 16760, 1024, 8388608, 17424, 1024, 8388608, 17424, 1024, 8388608, 17440, 1024, 8388608, 17440, 1024, 8388608, 17456, 1024, 8388608, 17456, 1024, 8388608] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436523911433546_202_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436523911433546_202_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..af495ff6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436523911433546_202_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,464 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 39)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 1)) { + break; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 45))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 18)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((304 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((336 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((347 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (384 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 0, 1296, 1, 0, 2560, 1, 268435456, 2560, 1, 268435456, 2576, 1, 268435456, 2576, 1, 268435456, 3264, 1, 268500992, 3264, 1, 268500992, 3264, 1, 268500992, 3280, 1, 268500992, 3280, 1, 268500992, 3280, 1, 268500992, 5712, 0, 1048832, 5712, 0, 1048832, 5716, 0, 1048832, 5716, 0, 1048832, 5720, 0, 1048832, 5720, 0, 1048832, 5728, 0, 1048832, 5728, 0, 1048832, 5732, 0, 1048832, 5732, 0, 1048832, 5736, 0, 1048832, 5736, 0, 1048832, 5744, 0, 1048832, 5744, 0, 1048832, 5748, 0, 1048832, 5748, 0, 1048832, 5752, 0, 1048832, 5752, 0, 1048832, 10944, 537002016, 2097664, 10944, 537002016, 2097664, 10944, 537002016, 2097664, 10944, 537002016, 2097664, 10944, 537002016, 2097664, 12608, 0, 67125248, 12608, 0, 67125248, 12928, 0, 1074003968, 12928, 0, 1074003968, 13888, 0, 1140850688, 13888, 0, 1140850688, 15824, 67108864, 0, 15840, 67108864, 0, 16256, 1024, 0, 16704, 559240, 0, 16704, 559240, 0, 16704, 559240, 0, 16704, 559240, 0, 16704, 559240, 0, 17600, 15, 0, 17600, 15, 0, 17600, 15, 0, 17600, 15, 0, 19472, 65536, 0, 21520, 4, 0, 21536, 4, 0, 21552, 4, 0, 22224, 4, 0, 22240, 4, 0, 22256, 4, 0, 22656, 34952, 0, 22656, 34952, 0, 22656, 34952, 0, 22656, 34952, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 23360, 262143, 0, 24576, 33554432, 1073741856, 24576, 33554432, 1073741856, 24576, 33554432, 1073741856, 26176, 33554432, 131200, 26176, 33554432, 131200, 26176, 33554432, 131200] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436570149208520_203_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436570149208520_203_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3271892d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436570149208520_203_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,237 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 30))) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((135 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3136, 4194560, 73744, 3136, 4194560, 73744, 3136, 4194560, 73744, 3136, 4194560, 73744, 3136, 4194560, 73744, 2880, 2, 0, 2496, 29, 3758096384, 2496, 29, 3758096384, 2496, 29, 3758096384, 2496, 29, 3758096384, 2496, 29, 3758096384, 2496, 29, 3758096384, 2496, 29, 3758096384, 2240, 0, 4194304, 8640, 0, 67108864, 8644, 0, 67108864, 8656, 0, 67108864, 8660, 0, 67108864, 14016, 545392672, 136348168, 14016, 545392672, 136348168, 14016, 545392672, 136348168, 14016, 545392672, 136348168, 14016, 545392672, 136348168, 14016, 545392672, 136348168, 14016, 545392672, 136348168, 14016, 545392672, 136348168, 14016, 545392672, 136348168, 14016, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436596976546445_205_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436596976546445_205_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6b015b16 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436596976546445_205_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 23)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1344, 2097151, 2147483648, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 1088, 1430257664, 1431655765, 2640, 73, 0, 2640, 73, 0, 2640, 73, 0, 2656, 73, 0, 2656, 73, 0, 2656, 73, 0, 3216, 2396745, 0, 3216, 2396745, 0, 3216, 2396745, 0, 3216, 2396745, 0, 3216, 2396745, 0, 3216, 2396745, 0, 3216, 2396745, 0, 3216, 2396745, 0, 3232, 2396745, 0, 3232, 2396745, 0, 3232, 2396745, 0, 3232, 2396745, 0, 3232, 2396745, 0, 3232, 2396745, 0, 3232, 2396745, 0, 3232, 2396745, 0, 4688, 134217728, 65536, 4688, 134217728, 65536, 4704, 134217728, 65536, 4704, 134217728, 65536, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513, 5568, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436598404842807_206_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436598404842807_206_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44468312 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436598404842807_206_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,269 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 2816, 17476, 1145307136, 2816, 17476, 1145307136, 2816, 17476, 1145307136, 2816, 17476, 1145307136, 2816, 17476, 1145307136, 2816, 17476, 1145307136, 2816, 17476, 1145307136, 2816, 17476, 1145307136, 2832, 17476, 1145307136, 2832, 17476, 1145307136, 2832, 17476, 1145307136, 2832, 17476, 1145307136, 2832, 17476, 1145307136, 2832, 17476, 1145307136, 2832, 17476, 1145307136, 2832, 17476, 1145307136, 2848, 17476, 1145307136, 2848, 17476, 1145307136, 2848, 17476, 1145307136, 2848, 17476, 1145307136, 2848, 17476, 1145307136, 2848, 17476, 1145307136, 2848, 17476, 1145307136, 2848, 17476, 1145307136, 3264, 838860, 0, 3264, 838860, 0, 3264, 838860, 0, 3264, 838860, 0, 3264, 838860, 0, 3264, 838860, 0, 3264, 838860, 0, 3264, 838860, 0, 3264, 838860, 0, 3264, 838860, 0, 3904, 85, 0, 3904, 85, 0, 3904, 85, 0, 3904, 85, 0, 5120, 85, 0, 5120, 85, 0, 5120, 85, 0, 5120, 85, 0, 7040, 512, 524290, 7040, 512, 524290, 7040, 512, 524290, 7056, 512, 524290, 7056, 512, 524290, 7056, 512, 524290, 7072, 512, 524290, 7072, 512, 524290, 7072, 512, 524290, 7940, 0, 128, 7944, 0, 128, 7956, 0, 128, 7960, 0, 128, 7972, 0, 128, 7976, 0, 128, 9536, 134217728, 0, 9552, 134217728, 0, 9568, 134217728, 0, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756436653154481542_208_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756436653154481542_208_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1c24f946 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756436653154481542_208_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((104 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 5972, 512, 131072, 5972, 512, 131072, 5976, 512, 131072, 5976, 512, 131072, 5980, 512, 131072, 5980, 512, 131072, 5988, 512, 131072, 5988, 512, 131072, 5992, 512, 131072, 5992, 512, 131072, 5996, 512, 131072, 5996, 512, 131072, 6676, 8192, 0, 6680, 8192, 0, 6684, 8192, 0, 6692, 8192, 0, 6696, 8192, 0, 6700, 8192, 0, 7888, 33554944, 536870912, 7888, 33554944, 536870912, 7888, 33554944, 536870912, 7904, 33554944, 536870912, 7904, 33554944, 536870912, 7904, 33554944, 536870912, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8384, 1145324612, 1145324612, 8832, 559240, 0, 8832, 559240, 0, 8832, 559240, 0, 8832, 559240, 0, 8832, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437123758112509_211_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437123758112509_211_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9d97de8f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437123758112509_211_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 53)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5696, 0, 4160749568, 5696, 0, 4160749568, 5696, 0, 4160749568, 5696, 0, 4160749568, 5696, 0, 4160749568, 5312, 16384, 256, 5312, 16384, 256, 4928, 1073741832, 0, 4928, 1073741832, 0, 4544, 2147483648, 0, 4160, 268435456, 524288, 4160, 268435456, 524288, 7616, 0, 268435456, 7632, 0, 268435456, 8064, 0, 285212672, 8064, 0, 285212672, 8080, 0, 285212672, 8080, 0, 285212672, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9408, 1145324612, 1145324612, 9856, 559240, 0, 9856, 559240, 0, 9856, 559240, 0, 9856, 559240, 0, 9856, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437220483754850_215_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437220483754850_215_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53816a37 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437220483754850_215_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 524320, 8192, 2624, 524320, 8192, 2624, 524320, 8192, 2368, 0, 1, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 2112, 16351, 4278190080, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 1856, 1431650304, 5592404, 6528, 2147483652, 33554432, 6528, 2147483652, 33554432, 6528, 2147483652, 33554432, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 6272, 715827882, 2829757098, 5888, 0, 1431655424, 5888, 0, 1431655424, 5888, 0, 1431655424, 5888, 0, 1431655424, 5888, 0, 1431655424, 5888, 0, 1431655424, 5888, 0, 1431655424, 5888, 0, 1431655424, 5888, 0, 1431655424, 5888, 0, 1431655424, 5888, 0, 1431655424, 5504, 1024, 0, 7488, 0, 4194304, 7504, 0, 4194304, 8388, 0, 8, 8392, 0, 8, 8404, 0, 8, 8408, 0, 8] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437289198401346_217_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437289198401346_217_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b2f878e1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437289198401346_217_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5952, 0, 1, 5568, 2359298, 2147483712, 5568, 2359298, 2147483712, 5568, 2359298, 2147483712, 5568, 2359298, 2147483712, 5568, 2359298, 2147483712, 5312, 1024, 0, 4928, 268435520, 536870944, 4928, 268435520, 536870944, 4928, 268435520, 536870944, 4928, 268435520, 536870944, 4672, 189, 0, 4672, 189, 0, 4672, 189, 0, 4672, 189, 0, 4672, 189, 0, 4672, 189, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437290232879718_218_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437290232879718_218_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1cf58d29 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437290232879718_218_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2880, 1431655765, 1431655765, 2496, 2048, 512, 2496, 2048, 512, 1856, 41642, 0, 1856, 41642, 0, 1856, 41642, 0, 1856, 41642, 0, 1856, 41642, 0, 1856, 41642, 0, 1856, 41642, 0, 4736, 0, 8192, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437291088171134_219_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437291088171134_219_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fd4bbf0e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437291088171134_219_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 8192, 1073741832, 2112, 8192, 1073741832, 2112, 8192, 1073741832, 1728, 384, 256, 1728, 384, 256, 1728, 384, 256] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437291356944708_220_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437291356944708_220_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1eb23ff5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437291356944708_220_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,270 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 57))) { + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 16)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 99 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 67108864, 1073741824, 1152, 67108864, 1073741824, 1168, 67108864, 1073741824, 1168, 67108864, 1073741824, 5120, 1, 0, 5952, 16, 0, 8000, 16, 0, 8320, 1048832, 16781313, 8320, 1048832, 16781313, 8320, 1048832, 16781313, 8320, 1048832, 16781313, 8320, 1048832, 16781313, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9216, 1145324612, 1145324612, 9664, 559240, 0, 9664, 559240, 0, 9664, 559240, 0, 9664, 559240, 0, 9664, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437292979563887_221_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437292979563887_221_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0bafca26 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437292979563887_221_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((229 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 228 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3776, 0, 4294967295, 3392, 2048, 0, 3136, 63, 0, 3136, 63, 0, 3136, 63, 0, 3136, 63, 0, 3136, 63, 0, 3136, 63, 0, 2880, 17039872, 0, 2880, 17039872, 0, 2880, 17039872, 0, 4416, 73, 0, 4416, 73, 0, 4416, 73, 0, 5568, 2, 0, 6912, 2, 0, 10688, 65536, 0, 11856, 2340, 1226833920, 11856, 2340, 1226833920, 11856, 2340, 1226833920, 11856, 2340, 1226833920, 11856, 2340, 1226833920, 11856, 2340, 1226833920, 11856, 2340, 1226833920, 11856, 2340, 1226833920, 11872, 2340, 1226833920, 11872, 2340, 1226833920, 11872, 2340, 1226833920, 11872, 2340, 1226833920, 11872, 2340, 1226833920, 11872, 2340, 1226833920, 11872, 2340, 1226833920, 11872, 2340, 1226833920, 13456, 0, 4096, 13460, 0, 4096, 13472, 0, 4096, 13476, 0, 4096, 14672, 131072, 2097152, 14672, 131072, 2097152, 14676, 131072, 2097152, 14676, 131072, 2097152, 14688, 131072, 2097152, 14688, 131072, 2097152, 14692, 131072, 2097152, 14692, 131072, 2097152] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437298064985399_222_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437298064985399_222_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..464ddeb4 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437298064985399_222_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,251 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 37)) { + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 40))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6400, 0, 286330880, 6400, 0, 286330880, 6400, 0, 286330880, 6400, 0, 286330880, 6400, 0, 286330880, 8000, 1, 16, 8000, 1, 16, 9232, 286331153, 17, 9232, 286331153, 17, 9232, 286331153, 17, 9232, 286331153, 17, 9232, 286331153, 17, 9232, 286331153, 17, 9232, 286331153, 17, 9232, 286331153, 17, 9232, 286331153, 17, 9232, 286331153, 17, 9248, 286331153, 17, 9248, 286331153, 17, 9248, 286331153, 17, 9248, 286331153, 17, 9248, 286331153, 17, 9248, 286331153, 17, 9248, 286331153, 17, 9248, 286331153, 17, 9248, 286331153, 17, 9248, 286331153, 17, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 13696, 1145324612, 1145324612, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0, 14144, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437300822706879_224_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437300822706879_224_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6d2ee677 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437300822706879_224_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,228 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 61))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3904, 34, 570425344, 3904, 34, 570425344, 3904, 34, 570425344, 3904, 34, 570425344, 6592, 34, 570425344, 6592, 34, 570425344, 6592, 34, 570425344, 6592, 34, 570425344, 8784, 67108864, 0, 8788, 67108864, 0, 8800, 67108864, 0, 8804, 67108864, 0, 10240, 838860, 0, 10240, 838860, 0, 10240, 838860, 0, 10240, 838860, 0, 10240, 838860, 0, 10240, 838860, 0, 10240, 838860, 0, 10240, 838860, 0, 10240, 838860, 0, 10240, 838860, 0, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765, 11392, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437448163402726_227_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437448163402726_227_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7641b783 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437448163402726_227_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,77 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 384 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1856, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765, 1872, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437449188459719_228_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437449188459719_228_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..419f7fbf --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437449188459719_228_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,425 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 51)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 42))) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((145 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 48))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((262 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((281 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 59))) { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 35))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((415 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((430 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (434 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 369 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1728, 27, 2684354560, 1728, 27, 2684354560, 1728, 27, 2684354560, 1728, 27, 2684354560, 1728, 27, 2684354560, 1728, 27, 2684354560, 3840, 17, 269549568, 3840, 17, 269549568, 3840, 17, 269549568, 3840, 17, 269549568, 3840, 17, 269549568, 5760, 8, 0, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7104, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7120, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 7136, 2863311530, 2863311530, 15120, 341, 0, 15120, 341, 0, 15120, 341, 0, 15120, 341, 0, 15120, 341, 0, 16788, 1024, 0, 18004, 65536, 1048576, 18004, 65536, 1048576, 18640, 21, 0, 18640, 21, 0, 18640, 21, 0, 22656, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437533162544221_230_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437533162544221_230_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f242c143 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437533162544221_230_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,421 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 62))) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 52)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 54))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (295 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((361 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((370 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((408 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((415 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 2)) { + break; + } + } + } + break; + } + case 2: { + for (uint i7 = 0; (i7 < 2); i7 = (i7 + 1)) { + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((444 << 6) | (i7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((451 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 291 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4480, 8192, 0, 20352, 272696336, 68174084, 20352, 272696336, 68174084, 20352, 272696336, 68174084, 20352, 272696336, 68174084, 20352, 272696336, 68174084, 20352, 272696336, 68174084, 20352, 272696336, 68174084, 20352, 272696336, 68174084, 20352, 272696336, 68174084, 20352, 272696336, 68174084, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28420, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28424, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28428, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28436, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28440, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28444, 149796, 1227132928, 28864, 36, 0, 28864, 36, 0, 28880, 36, 0, 28880, 36, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437541515248397_232_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437541515248397_232_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..81d75e68 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437541515248397_232_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 165 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1536, 8738, 0, 1536, 8738, 0, 1536, 8738, 0, 1536, 8738, 0, 1552, 8738, 0, 1552, 8738, 0, 1552, 8738, 0, 1552, 8738, 0, 2112, 572662306, 0, 2112, 572662306, 0, 2112, 572662306, 0, 2112, 572662306, 0, 2112, 572662306, 0, 2112, 572662306, 0, 2112, 572662306, 0, 2112, 572662306, 0, 2128, 572662306, 0, 2128, 572662306, 0, 2128, 572662306, 0, 2128, 572662306, 0, 2128, 572662306, 0, 2128, 572662306, 0, 2128, 572662306, 0, 2128, 572662306, 0, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2432, 1145324612, 1145324612, 2880, 559240, 0, 2880, 559240, 0, 2880, 559240, 0, 2880, 559240, 0, 2880, 559240, 0, 3520, 85, 0, 3520, 85, 0, 3520, 85, 0, 3520, 85, 0, 4736, 85, 0, 4736, 85, 0, 4736, 85, 0, 4736, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437570522040653_234_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437570522040653_234_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e3e8ea07 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437570522040653_234_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 44)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 268439552, 0, 1600, 268439552, 0, 1616, 268439552, 0, 1616, 268439552, 0, 10752, 559240, 0, 10752, 559240, 0, 10752, 559240, 0, 10752, 559240, 0, 10752, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437580730444293_236_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437580730444293_236_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a050d008 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437580730444293_236_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 50))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4224, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4240, 21841, 1162870784, 4928, 1, 0, 4944, 1, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437581831094736_237_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437581831094736_237_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437581831094736_237_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437584138807446_239_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437584138807446_239_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b4072cd1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437584138807446_239_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,204 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((131 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1059 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1344, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1360, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 1376, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2432, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2436, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2440, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2448, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2452, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2456, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2464, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2468, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 2472, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3024, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 3040, 286331153, 286331153, 4560, 2, 0, 4576, 2, 0, 4592, 2, 0, 5136, 0, 572653568, 5136, 0, 572653568, 5136, 0, 572653568, 5136, 0, 572653568, 5152, 0, 572653568, 5152, 0, 572653568, 5152, 0, 572653568, 5152, 0, 572653568, 5168, 0, 572653568, 5168, 0, 572653568, 5168, 0, 572653568, 5168, 0, 572653568, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 5440, 1145324612, 1145324612, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6480, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6496, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 6512, 2290649224, 2290649224, 10624, 1048704, 266240, 10624, 1048704, 266240, 10624, 1048704, 266240, 10624, 1048704, 266240, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 10368, 15, 4278190080, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384, 11776, 32767, 3758096384] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437625650942436_240_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437625650942436_240_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a01cdc3b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437625650942436_240_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,74 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((20 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1300, 16, 0, 1304, 16, 0, 1316, 16, 0, 1320, 16, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437636065197950_242_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437636065197950_242_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7630aa90 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437636065197950_242_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 60))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 7952, 2340, 1224736768, 7952, 2340, 1224736768, 7952, 2340, 1224736768, 7952, 2340, 1224736768, 7952, 2340, 1224736768, 7952, 2340, 1224736768, 7952, 2340, 1224736768, 7968, 2340, 1224736768, 7968, 2340, 1224736768, 7968, 2340, 1224736768, 7968, 2340, 1224736768, 7968, 2340, 1224736768, 7968, 2340, 1224736768, 7968, 2340, 1224736768, 7984, 2340, 1224736768, 7984, 2340, 1224736768, 7984, 2340, 1224736768, 7984, 2340, 1224736768, 7984, 2340, 1224736768, 7984, 2340, 1224736768, 7984, 2340, 1224736768] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437636747611689_243_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437636747611689_243_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4c7113ce --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437636747611689_243_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,176 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 50))) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 6528, 2181570690, 8521760, 6528, 2181570690, 8521760, 6528, 2181570690, 8521760, 6528, 2181570690, 8521760, 6528, 2181570690, 8521760, 6528, 2181570690, 8521760, 6528, 2181570690, 8521760, 6528, 2181570690, 8521760, 6528, 2181570690, 8521760, 6528, 2181570690, 8521760, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513, 6848, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437637245242823_244_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437637245242823_244_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ba6976f1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437637245242823_244_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,209 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 31))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 43)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((159 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((170 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 468 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 10180, 0, 1158959104, 10180, 0, 1158959104, 10180, 0, 1158959104, 10180, 0, 1158959104, 10180, 0, 1158959104, 10180, 0, 1158959104, 10180, 0, 1158959104, 10184, 0, 1158959104, 10184, 0, 1158959104, 10184, 0, 1158959104, 10184, 0, 1158959104, 10184, 0, 1158959104, 10184, 0, 1158959104, 10184, 0, 1158959104, 10196, 0, 1158959104, 10196, 0, 1158959104, 10196, 0, 1158959104, 10196, 0, 1158959104, 10196, 0, 1158959104, 10196, 0, 1158959104, 10196, 0, 1158959104, 10200, 0, 1158959104, 10200, 0, 1158959104, 10200, 0, 1158959104, 10200, 0, 1158959104, 10200, 0, 1158959104, 10200, 0, 1158959104, 10200, 0, 1158959104, 10884, 0, 1158959104, 10884, 0, 1158959104, 10884, 0, 1158959104, 10884, 0, 1158959104, 10884, 0, 1158959104, 10884, 0, 1158959104, 10884, 0, 1158959104, 10888, 0, 1158959104, 10888, 0, 1158959104, 10888, 0, 1158959104, 10888, 0, 1158959104, 10888, 0, 1158959104, 10888, 0, 1158959104, 10888, 0, 1158959104, 10900, 0, 1158959104, 10900, 0, 1158959104, 10900, 0, 1158959104, 10900, 0, 1158959104, 10900, 0, 1158959104, 10900, 0, 1158959104, 10900, 0, 1158959104, 10904, 0, 1158959104, 10904, 0, 1158959104, 10904, 0, 1158959104, 10904, 0, 1158959104, 10904, 0, 1158959104, 10904, 0, 1158959104, 10904, 0, 1158959104, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12032, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12048, 1797558, 0, 12736, 3510, 0, 12736, 3510, 0, 12736, 3510, 0, 12736, 3510, 0, 12736, 3510, 0, 12736, 3510, 0, 12736, 3510, 0, 12736, 3510, 0, 12752, 3510, 0, 12752, 3510, 0, 12752, 3510, 0, 12752, 3510, 0, 12752, 3510, 0, 12752, 3510, 0, 12752, 3510, 0, 12752, 3510, 0, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13440, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392, 13456, 11702, 1840699392] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437716112452083_246_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437716112452083_246_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..21008256 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437716112452083_246_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 63)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 183 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 2576, 0, 262144, 2592, 0, 262144, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3792, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 3808, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4368, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 4384, 68174084, 1090785345, 5712, 0, 2097152, 5728, 0, 2097152] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437717237263363_247_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437717237263363_247_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..29f5e5f4 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437717237263363_247_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,396 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 47))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 43))) { + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 273, 0, 1040, 273, 0, 1040, 273, 0, 1056, 273, 0, 1056, 273, 0, 1056, 273, 0, 3732, 1, 285212672, 3732, 1, 285212672, 3732, 1, 285212672, 3736, 1, 285212672, 3736, 1, 285212672, 3736, 1, 285212672, 3740, 1, 285212672, 3740, 1, 285212672, 3740, 1, 285212672, 3748, 1, 285212672, 3748, 1, 285212672, 3748, 1, 285212672, 3752, 1, 285212672, 3752, 1, 285212672, 3752, 1, 285212672, 3756, 1, 285212672, 3756, 1, 285212672, 3756, 1, 285212672, 4436, 17, 285212672, 4436, 17, 285212672, 4436, 17, 285212672, 4436, 17, 285212672, 4440, 17, 285212672, 4440, 17, 285212672, 4440, 17, 285212672, 4440, 17, 285212672, 4444, 17, 285212672, 4444, 17, 285212672, 4444, 17, 285212672, 4444, 17, 285212672, 4452, 17, 285212672, 4452, 17, 285212672, 4452, 17, 285212672, 4452, 17, 285212672, 4456, 17, 285212672, 4456, 17, 285212672, 4456, 17, 285212672, 4456, 17, 285212672, 4460, 17, 285212672, 4460, 17, 285212672, 4460, 17, 285212672, 4460, 17, 285212672, 9216, 0, 536870912, 9232, 0, 536870912, 9248, 0, 536870912, 9664, 0, 536870912, 9680, 0, 536870912, 9696, 0, 536870912, 12224, 537002016, 2097664, 12224, 537002016, 2097664, 12224, 537002016, 2097664, 12224, 537002016, 2097664, 12224, 537002016, 2097664, 16448, 559240, 0, 16448, 559240, 0, 16448, 559240, 0, 16448, 559240, 0, 16448, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437762644607360_248_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437762644607360_248_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..afa5aff5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437762644607360_248_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,425 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 25))) { + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 54))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 50))) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((210 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((229 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 53)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((238 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (278 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (327 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (370 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((387 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() >= 41)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((413 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 333 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 17, 0, 1792, 17, 0, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11328, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 11344, 1145324612, 1145324612, 15236, 0, 1717567488, 15236, 0, 1717567488, 15236, 0, 1717567488, 15236, 0, 1717567488, 15236, 0, 1717567488, 15236, 0, 1717567488, 15240, 0, 1717567488, 15240, 0, 1717567488, 15240, 0, 1717567488, 15240, 0, 1717567488, 15240, 0, 1717567488, 15240, 0, 1717567488, 15252, 0, 1717567488, 15252, 0, 1717567488, 15252, 0, 1717567488, 15252, 0, 1717567488, 15252, 0, 1717567488, 15252, 0, 1717567488, 15256, 0, 1717567488, 15256, 0, 1717567488, 15256, 0, 1717567488, 15256, 0, 1717567488, 15256, 0, 1717567488, 15256, 0, 1717567488, 15680, 559240, 0, 15680, 559240, 0, 15680, 559240, 0, 15680, 559240, 0, 15680, 559240, 0, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 16576, 2863311530, 2863311530, 19712, 65, 0, 19712, 65, 0, 20928, 272696336, 68174084, 20928, 272696336, 68174084, 20928, 272696336, 68174084, 20928, 272696336, 68174084, 20928, 272696336, 68174084, 20928, 272696336, 68174084, 20928, 272696336, 68174084, 20928, 272696336, 68174084, 20928, 272696336, 68174084, 20928, 272696336, 68174084] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437823930639930_249_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437823930639930_249_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..63dcacdc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437823930639930_249_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1314 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 2148564992, 33554560, 1920, 2148564992, 33554560, 1920, 2148564992, 33554560, 1920, 2148564992, 33554560, 1920, 2148564992, 33554560, 1936, 2148564992, 33554560, 1936, 2148564992, 33554560, 1936, 2148564992, 33554560, 1936, 2148564992, 33554560, 1936, 2148564992, 33554560, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3076, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3080, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3092, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3096, 262143, 4286578688, 3716, 73, 0, 3716, 73, 0, 3716, 73, 0, 3720, 73, 0, 3720, 73, 0, 3720, 73, 0, 3732, 73, 0, 3732, 73, 0, 3732, 73, 0, 3736, 73, 0, 3736, 73, 0, 3736, 73, 0, 4292, 272696336, 68174084, 4292, 272696336, 68174084, 4292, 272696336, 68174084, 4292, 272696336, 68174084, 4292, 272696336, 68174084, 4292, 272696336, 68174084, 4292, 272696336, 68174084, 4292, 272696336, 68174084, 4292, 272696336, 68174084, 4292, 272696336, 68174084, 4296, 272696336, 68174084, 4296, 272696336, 68174084, 4296, 272696336, 68174084, 4296, 272696336, 68174084, 4296, 272696336, 68174084, 4296, 272696336, 68174084, 4296, 272696336, 68174084, 4296, 272696336, 68174084, 4296, 272696336, 68174084, 4296, 272696336, 68174084, 4308, 272696336, 68174084, 4308, 272696336, 68174084, 4308, 272696336, 68174084, 4308, 272696336, 68174084, 4308, 272696336, 68174084, 4308, 272696336, 68174084, 4308, 272696336, 68174084, 4308, 272696336, 68174084, 4308, 272696336, 68174084, 4308, 272696336, 68174084, 4312, 272696336, 68174084, 4312, 272696336, 68174084, 4312, 272696336, 68174084, 4312, 272696336, 68174084, 4312, 272696336, 68174084, 4312, 272696336, 68174084, 4312, 272696336, 68174084, 4312, 272696336, 68174084, 4312, 272696336, 68174084, 4312, 272696336, 68174084, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4612, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4616, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4628, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 4632, 3067833782, 1840700269, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5316, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5320, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5332, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 5336, 524287, 4026531840, 6720, 2147557376, 4096, 6720, 2147557376, 4096, 6720, 2147557376, 4096, 6720, 2147557376, 4096, 6736, 2147557376, 4096, 6736, 2147557376, 4096, 6736, 2147557376, 4096, 6736, 2147557376, 4096] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437905792401861_250_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437905792401861_250_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..924db3ff --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437905792401861_250_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,181 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 17))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 201 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6080, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6096, 572662306, 572662306, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6848, 559240, 0, 6848, 559240, 0, 6848, 559240, 0, 6848, 559240, 0, 6848, 559240, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0, 7488, 85, 0, 9472, 134217760, 131104, 9472, 134217760, 131104, 9472, 134217760, 131104, 9472, 134217760, 131104, 9488, 134217760, 131104, 9488, 134217760, 131104, 9488, 134217760, 131104, 9488, 134217760, 131104] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437913266106674_251_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437913266106674_251_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d3b3e71d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437913266106674_251_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,157 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 41))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 9, 2415919104, 2240, 9, 2415919104, 2240, 9, 2415919104, 2240, 9, 2415919104, 2256, 9, 2415919104, 2256, 9, 2415919104, 2256, 9, 2415919104, 2256, 9, 2415919104, 3328, 1, 268435456, 3328, 1, 268435456, 3332, 1, 268435456, 3332, 1, 268435456, 3344, 1, 268435456, 3344, 1, 268435456, 3348, 1, 268435456, 3348, 1, 268435456, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513, 9600, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437914418502177_252_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437914418502177_252_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..306aea8f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437914418502177_252_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,346 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 53)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 48))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 31)) { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((208 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 54))) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((286 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((295 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 2)) { + break; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 291 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6912, 4369, 0, 6912, 4369, 0, 6912, 4369, 0, 6912, 4369, 0, 10688, 286331153, 0, 10688, 286331153, 0, 10688, 286331153, 0, 10688, 286331153, 0, 10688, 286331153, 0, 10688, 286331153, 0, 10688, 286331153, 0, 10688, 286331153, 0, 11600, 8738, 0, 11600, 8738, 0, 11600, 8738, 0, 11600, 8738, 0, 11616, 8738, 0, 11616, 8738, 0, 11616, 8738, 0, 11616, 8738, 0, 11632, 8738, 0, 11632, 8738, 0, 11632, 8738, 0, 11632, 8738, 0, 12240, 546, 0, 12240, 546, 0, 12240, 546, 0, 12256, 546, 0, 12256, 546, 0, 12256, 546, 0, 12272, 546, 0, 12272, 546, 0, 12272, 546, 0, 13328, 572662306, 0, 13328, 572662306, 0, 13328, 572662306, 0, 13328, 572662306, 0, 13328, 572662306, 0, 13328, 572662306, 0, 13328, 572662306, 0, 13328, 572662306, 0, 13332, 572662306, 0, 13332, 572662306, 0, 13332, 572662306, 0, 13332, 572662306, 0, 13332, 572662306, 0, 13332, 572662306, 0, 13332, 572662306, 0, 13332, 572662306, 0, 13344, 572662306, 0, 13344, 572662306, 0, 13344, 572662306, 0, 13344, 572662306, 0, 13344, 572662306, 0, 13344, 572662306, 0, 13344, 572662306, 0, 13344, 572662306, 0, 13348, 572662306, 0, 13348, 572662306, 0, 13348, 572662306, 0, 13348, 572662306, 0, 13348, 572662306, 0, 13348, 572662306, 0, 13348, 572662306, 0, 13348, 572662306, 0, 13360, 572662306, 0, 13360, 572662306, 0, 13360, 572662306, 0, 13360, 572662306, 0, 13360, 572662306, 0, 13360, 572662306, 0, 13360, 572662306, 0, 13360, 572662306, 0, 13364, 572662306, 0, 13364, 572662306, 0, 13364, 572662306, 0, 13364, 572662306, 0, 13364, 572662306, 0, 13364, 572662306, 0, 13364, 572662306, 0, 13364, 572662306, 0, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612, 15168, 1145324612, 1145324612] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756437920717755784_253_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756437920717755784_253_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..67cf6b8d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756437920717755784_253_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,385 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((212 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((274 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((283 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((334 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((353 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((372 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((383 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i7 == 1)) { + continue; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 17)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (408 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (424 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 261 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3600, 1048580, 0, 3600, 1048580, 0, 3616, 1048580, 0, 3616, 1048580, 0, 3632, 1048580, 0, 3632, 1048580, 0, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 5824, 1431655765, 1431655765, 9744, 0, 8388608, 9760, 0, 8388608, 9776, 0, 8388608, 14288, 0, 524288, 14304, 0, 524288, 14320, 0, 524288, 15376, 2097280, 0, 15376, 2097280, 0, 15392, 2097280, 0, 15392, 2097280, 0, 15408, 2097280, 0, 15408, 2097280, 0, 19456, 2, 536870912, 19456, 2, 536870912, 21392, 2, 0, 21408, 2, 0, 21424, 2, 0, 23808, 0, 35651584, 23808, 0, 35651584, 23824, 0, 35651584, 23824, 0, 35651584, 23840, 0, 35651584, 23840, 0, 35651584, 24512, 139808, 35782656, 24512, 139808, 35782656, 24512, 139808, 35782656, 24512, 139808, 35782656, 24512, 139808, 35782656, 24512, 139808, 35782656, 24512, 139808, 35782656, 24528, 139808, 35782656, 24528, 139808, 35782656, 24528, 139808, 35782656, 24528, 139808, 35782656, 24528, 139808, 35782656, 24528, 139808, 35782656, 24528, 139808, 35782656, 24544, 139808, 35782656, 24544, 139808, 35782656, 24544, 139808, 35782656, 24544, 139808, 35782656, 24544, 139808, 35782656, 24544, 139808, 35782656, 24544, 139808, 35782656, 26880, 559240, 0, 26880, 559240, 0, 26880, 559240, 0, 26880, 559240, 0, 26880, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438088341845436_254_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438088341845436_254_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438088341845436_254_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438093559011814_256_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438093559011814_256_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d11a3764 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438093559011814_256_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,351 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 47)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 27)) { + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() >= 49)) { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 11776, 1145324612, 1145324612, 4368, 0, 262144, 4372, 0, 262144, 4384, 0, 262144, 4388, 0, 262144, 6608, 0, 4194304, 6624, 0, 4194304, 7552, 136, 0, 7552, 136, 0, 7568, 136, 0, 7568, 136, 0, 8960, 32768, 0, 8964, 32768, 0, 8968, 32768, 0, 8976, 32768, 0, 8980, 32768, 0, 8984, 32768, 0, 9600, 17, 0, 9600, 17, 0, 576, 17, 0, 576, 17, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438114178778089_257_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438114178778089_257_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5ec794ab --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438114178778089_257_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,275 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 58))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 17))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 52))) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 6592, 0, 512, 7760, 8388608, 4096, 7760, 8388608, 4096, 7776, 8388608, 4096, 7776, 8388608, 4096, 7792, 8388608, 4096, 7792, 8388608, 4096, 8384, 85, 0, 8384, 85, 0, 8384, 85, 0, 8384, 85, 0, 15680, 557192, 0, 15680, 557192, 0, 15680, 557192, 0, 15680, 557192, 0, 16384, 8, 134217728, 16384, 8, 134217728] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438118471082189_258_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438118471082189_258_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0b6fead5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438118471082189_258_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,155 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 5))) { + if ((WaveGetLaneIndex() < 30)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 141 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 1, 0, 2496, 33, 0, 2496, 33, 0, 2512, 33, 0, 2512, 33, 0, 2944, 33, 0, 2944, 33, 0, 2960, 33, 0, 2960, 33, 0, 3520, 33, 0, 3520, 33, 0, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 4416, 1431655765, 1431655765, 5056, 85, 0, 5056, 85, 0, 5056, 85, 0, 5056, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438119062678403_259_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438119062678403_259_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..58ee03fa --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438119062678403_259_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,395 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 44))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 44))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((WaveGetLaneIndex() < 20)) { + if ((WaveGetLaneIndex() >= 44)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 54)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 46))) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((292 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((326 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (345 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((359 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (394 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (412 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6416, 4096, 4096, 6416, 4096, 4096, 6420, 4096, 4096, 6420, 4096, 4096, 6432, 4096, 4096, 6432, 4096, 4096, 6436, 4096, 4096, 6436, 4096, 4096, 6448, 4096, 4096, 6448, 4096, 4096, 6452, 4096, 4096, 6452, 4096, 4096, 6992, 4096, 4096, 6992, 4096, 4096, 6996, 4096, 4096, 6996, 4096, 4096, 7008, 4096, 4096, 7008, 4096, 4096, 7012, 4096, 4096, 7012, 4096, 4096, 7024, 4096, 4096, 7024, 4096, 4096, 7028, 4096, 4096, 7028, 4096, 4096, 9040, 167772160, 134217728, 9040, 167772160, 134217728, 9040, 167772160, 134217728, 9056, 167772160, 134217728, 9056, 167772160, 134217728, 9056, 167772160, 134217728, 10452, 16777218, 262144, 10452, 16777218, 262144, 10452, 16777218, 262144, 10456, 16777218, 262144, 10456, 16777218, 262144, 10456, 16777218, 262144, 10468, 16777218, 262144, 10468, 16777218, 262144, 10468, 16777218, 262144, 10472, 16777218, 262144, 10472, 16777218, 262144, 10472, 16777218, 262144, 12864, 5, 0, 12864, 5, 0, 14016, 170, 0, 14016, 170, 0, 14016, 170, 0, 14016, 170, 0, 15104, 17, 0, 15104, 17, 0, 16320, 34, 572522496, 16320, 34, 572522496, 16320, 34, 572522496, 16320, 34, 572522496, 16320, 34, 572522496, 16336, 34, 572522496, 16336, 34, 572522496, 16336, 34, 572522496, 16336, 34, 572522496, 16336, 34, 572522496, 16352, 34, 572522496, 16352, 34, 572522496, 16352, 34, 572522496, 16352, 34, 572522496, 16352, 34, 572522496, 20160, 0, 8192, 20176, 0, 8192, 20192, 0, 8192, 20864, 2, 536870912, 20864, 2, 536870912, 20880, 2, 536870912, 20880, 2, 536870912, 20896, 2, 536870912, 20896, 2, 536870912, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22080, 1145324612, 1145324612, 22992, 262144, 0, 23008, 262144, 0, 23024, 262144, 0, 26368, 559240, 0, 26368, 559240, 0, 26368, 559240, 0, 26368, 559240, 0, 26368, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438204110003048_261_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438204110003048_261_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3a9ab39f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438204110003048_261_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,311 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 26)) { + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 42)) { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 540 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2048, 0, 512, 2064, 0, 512, 2080, 0, 512, 2688, 35791394, 0, 2688, 35791394, 0, 2688, 35791394, 0, 2688, 35791394, 0, 2688, 35791394, 0, 2688, 35791394, 0, 2688, 35791394, 0, 2704, 35791394, 0, 2704, 35791394, 0, 2704, 35791394, 0, 2704, 35791394, 0, 2704, 35791394, 0, 2704, 35791394, 0, 2704, 35791394, 0, 2720, 35791394, 0, 2720, 35791394, 0, 2720, 35791394, 0, 2720, 35791394, 0, 2720, 35791394, 0, 2720, 35791394, 0, 2720, 35791394, 0, 3136, 2236962, 0, 3136, 2236962, 0, 3136, 2236962, 0, 3136, 2236962, 0, 3136, 2236962, 0, 3136, 2236962, 0, 3152, 2236962, 0, 3152, 2236962, 0, 3152, 2236962, 0, 3152, 2236962, 0, 3152, 2236962, 0, 3152, 2236962, 0, 3168, 2236962, 0, 3168, 2236962, 0, 3168, 2236962, 0, 3168, 2236962, 0, 3168, 2236962, 0, 3168, 2236962, 0, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 3648, 1145324612, 1145324612, 7424, 136, 2290647040, 7424, 136, 2290647040, 7424, 136, 2290647040, 7424, 136, 2290647040, 7424, 136, 2290647040, 7424, 136, 2290647040, 7424, 136, 2290647040, 8064, 8, 0, 8960, 8390656, 134250504, 8960, 8390656, 134250504, 8960, 8390656, 134250504, 8960, 8390656, 134250504, 8960, 8390656, 134250504, 9792, 0, 2290647040, 9792, 0, 2290647040, 9792, 0, 2290647040, 9792, 0, 2290647040, 9792, 0, 2290647040, 10432, 0, 4227858432, 10432, 0, 4227858432, 10432, 0, 4227858432, 10432, 0, 4227858432, 10432, 0, 4227858432, 10432, 0, 4227858432, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 10880, 67108863, 0, 12352, 1365, 0, 12352, 1365, 0, 12352, 1365, 0, 12352, 1365, 0, 12352, 1365, 0, 12352, 1365, 0, 13184, 21845, 0, 13184, 21845, 0, 13184, 21845, 0, 13184, 21845, 0, 13184, 21845, 0, 13184, 21845, 0, 13184, 21845, 0, 13184, 21845, 0, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 13760, 1431655765, 341, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16448, 0, 4294959104, 16064, 2147483648, 0, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168, 15680, 4095, 7168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438228540518148_262_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438228540518148_262_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..942ad80a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438228540518148_262_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,293 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 53))) { + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 53))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 61))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((294 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((322 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 1073741824, 0, 1744, 1073741824, 0, 3652, 262400, 0, 3652, 262400, 0, 3656, 262400, 0, 3656, 262400, 0, 3668, 262400, 0, 3668, 262400, 0, 3672, 262400, 0, 3672, 262400, 0, 5956, 16, 0, 5960, 16, 0, 5972, 16, 0, 5976, 16, 0, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20608, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20624, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610, 20640, 2863311530, 2861181610] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438271599162146_263_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438271599162146_263_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d731c361 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438271599162146_263_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,442 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 51)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 28))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 42)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((279 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((294 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 53))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (349 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (356 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (366 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((405 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 231 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 2, 0, 2688, 2, 0, 2704, 2, 0, 3776, 17, 0, 3776, 17, 0, 7680, 4456448, 0, 7680, 4456448, 0, 7696, 4456448, 0, 7696, 4456448, 0, 7712, 4456448, 0, 7712, 4456448, 0, 8512, 559240, 0, 8512, 559240, 0, 8512, 559240, 0, 8512, 559240, 0, 8512, 559240, 0, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 9472, 2097149, 4293918720, 10112, 17, 0, 10112, 17, 0, 11328, 64, 0, 13312, 0, 67125248, 13312, 0, 67125248, 14080, 0, 67108864, 14400, 67125252, 1074004032, 14400, 67125252, 1074004032, 14400, 67125252, 1074004032, 14400, 67125252, 1074004032, 14400, 67125252, 1074004032, 14400, 67125252, 1074004032, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 15488, 17, 0, 15488, 17, 0, 19776, 0, 512, 19792, 0, 512, 23424, 68, 0, 23424, 68, 0, 25936, 8388608, 524288, 25936, 8388608, 524288, 25952, 8388608, 524288, 25952, 8388608, 524288, 25968, 8388608, 524288, 25968, 8388608, 524288] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438405020657911_266_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438405020657911_266_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..645eef41 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438405020657911_266_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 54))) { + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + } + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 159 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2512, 32768, 0, 2528, 32768, 0, 3472, 0, 4198400, 3472, 0, 4198400, 3476, 0, 4198400, 3476, 0, 4198400, 3480, 0, 4198400, 3480, 0, 4198400, 3488, 0, 4198400, 3488, 0, 4198400, 3492, 0, 4198400, 3492, 0, 4198400, 3496, 0, 4198400, 3496, 0, 4198400, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6976, 1431655765, 1431655765, 6720, 2, 2862612480, 6720, 2, 2862612480, 6720, 2, 2862612480, 6720, 2, 2862612480, 6720, 2, 2862612480, 6720, 2, 2862612480, 6720, 2, 2862612480] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438405886246607_267_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438405886246607_267_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..288bdb4b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438405886246607_267_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,322 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 54))) { + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((54 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 42))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5376, 256, 0, 6016, 17, 0, 6016, 17, 0, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 11008, 17, 0, 11008, 17, 0, 16448, 1092, 0, 16448, 1092, 0, 16448, 1092, 0, 16896, 2184, 0, 16896, 2184, 0, 16896, 2184, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438511667513006_269_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438511667513006_269_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e1a4a49d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438511667513006_269_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,346 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 16)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 46))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((207 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + if ((counter3 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 54))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 47))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 39)) { + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (313 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 474 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 1, 4194304, 2688, 1, 4194304, 2704, 1, 4194304, 2704, 1, 4194304, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3904, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3908, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3920, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 3924, 136348168, 2181570690, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4480, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4484, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4496, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 4500, 1090785345, 272696336, 5952, 262144, 532480, 5952, 262144, 532480, 5952, 262144, 532480, 5968, 262144, 532480, 5968, 262144, 532480, 5968, 262144, 532480, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6528, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 6544, 136348168, 2181570690, 7296, 272696336, 68174084, 7296, 272696336, 68174084, 7296, 272696336, 68174084, 7296, 272696336, 68174084, 7296, 272696336, 68174084, 7296, 272696336, 68174084, 7296, 272696336, 68174084, 7296, 272696336, 68174084, 7296, 272696336, 68174084, 7296, 272696336, 68174084, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 7616, 613566756, 1227133513, 10432, 268435456, 0, 13268, 0, 33554432, 13716, 0, 33554432, 17408, 2, 4194304, 17408, 2, 4194304, 19584, 0, 4194304, 21248, 0, 1] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438577186223001_270_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438577186223001_270_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fafe4f54 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438577186223001_270_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 16))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 0, 32, 1216, 2112, 8, 1216, 2112, 8, 1216, 2112, 8, 2112, 73, 0, 2112, 73, 0, 2112, 73, 0, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 3600, 0, 1224736768, 3600, 0, 1224736768, 3600, 0, 1224736768, 3616, 0, 1224736768, 3616, 0, 1224736768, 3616, 0, 1224736768, 5072, 0, 8, 5076, 0, 8, 5080, 0, 8, 5088, 0, 8, 5092, 0, 8, 5096, 0, 8, 5648, 613566756, 0, 5648, 613566756, 0, 5648, 613566756, 0, 5648, 613566756, 0, 5648, 613566756, 0, 5648, 613566756, 0, 5648, 613566756, 0, 5648, 613566756, 0, 5648, 613566756, 0, 5648, 613566756, 0, 5664, 613566756, 0, 5664, 613566756, 0, 5664, 613566756, 0, 5664, 613566756, 0, 5664, 613566756, 0, 5664, 613566756, 0, 5664, 613566756, 0, 5664, 613566756, 0, 5664, 613566756, 0, 5664, 613566756, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6784, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6800, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 6816, 65535, 0, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11584, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11600, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224, 11616, 0, 4294836224] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438602333094501_271_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438602333094501_271_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5350b75c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438602333094501_271_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 60))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 6020, 272696336, 68174084, 6020, 272696336, 68174084, 6020, 272696336, 68174084, 6020, 272696336, 68174084, 6020, 272696336, 68174084, 6020, 272696336, 68174084, 6020, 272696336, 68174084, 6020, 272696336, 68174084, 6020, 272696336, 68174084, 6020, 272696336, 68174084, 6024, 272696336, 68174084, 6024, 272696336, 68174084, 6024, 272696336, 68174084, 6024, 272696336, 68174084, 6024, 272696336, 68174084, 6024, 272696336, 68174084, 6024, 272696336, 68174084, 6024, 272696336, 68174084, 6024, 272696336, 68174084, 6024, 272696336, 68174084, 6036, 272696336, 68174084, 6036, 272696336, 68174084, 6036, 272696336, 68174084, 6036, 272696336, 68174084, 6036, 272696336, 68174084, 6036, 272696336, 68174084, 6036, 272696336, 68174084, 6036, 272696336, 68174084, 6036, 272696336, 68174084, 6036, 272696336, 68174084, 6040, 272696336, 68174084, 6040, 272696336, 68174084, 6040, 272696336, 68174084, 6040, 272696336, 68174084, 6040, 272696336, 68174084, 6040, 272696336, 68174084, 6040, 272696336, 68174084, 6040, 272696336, 68174084, 6040, 272696336, 68174084, 6040, 272696336, 68174084, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6596, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6600, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 6616, 2181570690, 545392672, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438605139033733_272_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438605139033733_272_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..07b815cb --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438605139033733_272_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 342 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 67112960, 8192, 1728, 67112960, 8192, 1728, 67112960, 8192, 1472, 16384, 1024, 1472, 16384, 1024, 2640, 511, 0, 2640, 511, 0, 2640, 511, 0, 2640, 511, 0, 2640, 511, 0, 2640, 511, 0, 2640, 511, 0, 2640, 511, 0, 2640, 511, 0, 3792, 15, 4160749568, 3792, 15, 4160749568, 3792, 15, 4160749568, 3792, 15, 4160749568, 3792, 15, 4160749568, 3792, 15, 4160749568, 3792, 15, 4160749568, 3792, 15, 4160749568, 3792, 15, 4160749568, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5008, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5012, 15, 4278190080, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5840, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 5844, 15, 4293918720, 6736, 15, 4261412864, 6736, 15, 4261412864, 6736, 15, 4261412864, 6736, 15, 4261412864, 6736, 15, 4261412864, 6736, 15, 4261412864, 6736, 15, 4261412864, 6736, 15, 4261412864, 6736, 15, 4261412864, 6736, 15, 4261412864, 6736, 15, 4261412864, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040, 7184, 0, 4294967040] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438652652429596_275_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438652652429596_275_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..de7e8192 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438652652429596_275_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,166 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 38)) { + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 4096, 63, 4294934528, 3712, 73728, 16, 3712, 73728, 16, 3712, 73728, 16, 3328, 4096, 0, 2944, 0, 8192, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5328, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 5344, 1431655765, 1431655765, 7760, 2, 2852126720, 7760, 2, 2852126720, 7760, 2, 2852126720, 7760, 2, 2852126720, 7760, 2, 2852126720, 7776, 2, 2852126720, 7776, 2, 2852126720, 7776, 2, 2852126720, 7776, 2, 2852126720, 7776, 2, 2852126720, 7792, 2, 2852126720, 7792, 2, 2852126720, 7792, 2, 2852126720, 7792, 2, 2852126720, 7792, 2, 2852126720, 8208, 0, 2, 8224, 0, 2, 8240, 0, 2] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438667777068543_276_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438667777068543_276_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cd728683 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438667777068543_276_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,307 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 53)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 21)) { + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((279 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 528 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1536, 0, 572522496, 1536, 0, 572522496, 1536, 0, 572522496, 1552, 0, 572522496, 1552, 0, 572522496, 1552, 0, 572522496, 1568, 0, 572522496, 1568, 0, 572522496, 1568, 0, 572522496, 5760, 0, 536870912, 5776, 0, 536870912, 5792, 0, 536870912, 6592, 64, 0, 7760, 1024, 67125248, 7760, 1024, 67125248, 7760, 1024, 67125248, 7776, 1024, 67125248, 7776, 1024, 67125248, 7776, 1024, 67125248, 8592, 0, 67125248, 8592, 0, 67125248, 8608, 0, 67125248, 8608, 0, 67125248, 8896, 67125252, 1074004032, 8896, 67125252, 1074004032, 8896, 67125252, 1074004032, 8896, 67125252, 1074004032, 8896, 67125252, 1074004032, 8896, 67125252, 1074004032, 10048, 838860, 0, 10048, 838860, 0, 10048, 838860, 0, 10048, 838860, 0, 10048, 838860, 0, 10048, 838860, 0, 10048, 838860, 0, 10048, 838860, 0, 10048, 838860, 0, 10048, 838860, 0, 10064, 838860, 0, 10064, 838860, 0, 10064, 838860, 0, 10064, 838860, 0, 10064, 838860, 0, 10064, 838860, 0, 10064, 838860, 0, 10064, 838860, 0, 10064, 838860, 0, 10064, 838860, 0, 11008, 0, 4194304, 11024, 0, 4194304, 11840, 73, 0, 11840, 73, 0, 11840, 73, 0, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 12416, 1363481681, 340870420, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14528, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 14544, 2863311530, 2863311530, 17856, 32768, 0, 17860, 32768, 0, 17872, 32768, 0, 17876, 32768, 0, 18432, 272696336, 68174084, 18432, 272696336, 68174084, 18432, 272696336, 68174084, 18432, 272696336, 68174084, 18432, 272696336, 68174084, 18432, 272696336, 68174084, 18432, 272696336, 68174084, 18432, 272696336, 68174084, 18432, 272696336, 68174084, 18432, 272696336, 68174084, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513, 18752, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438714730856443_277_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438714730856443_277_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..92899037 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438714730856443_277_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 41)) { + if ((WaveGetLaneIndex() < 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 2064, 1073741826, 2112, 2064, 1073741826, 2112, 2064, 1073741826, 2112, 2064, 1073741826, 1856, 33554464, 524288, 1856, 33554464, 524288, 1856, 33554464, 524288] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438715543793522_278_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438715543793522_278_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7d3b101b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438715543793522_278_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 58)) { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 34)) { + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 2368, 0, 64, 2384, 0, 64, 2816, 67108864, 0, 2832, 67108864, 0, 7680, 85, 0, 7680, 85, 0, 7680, 85, 0, 7680, 85, 0, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9536, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 9552, 0, 2863311488, 10112, 0, 2684354560, 10112, 0, 2684354560, 10128, 0, 2684354560, 10128, 0, 2684354560, 10816, 174762, 0, 10816, 174762, 0, 10816, 174762, 0, 10816, 174762, 0, 10816, 174762, 0, 10816, 174762, 0, 10816, 174762, 0, 10816, 174762, 0, 10816, 174762, 0, 10832, 174762, 0, 10832, 174762, 0, 10832, 174762, 0, 10832, 174762, 0, 10832, 174762, 0, 10832, 174762, 0, 10832, 174762, 0, 10832, 174762, 0, 10832, 174762, 0, 11776, 524288, 2048, 11776, 524288, 2048, 11792, 524288, 2048, 11792, 524288, 2048, 12416, 85, 0, 12416, 85, 0, 12416, 85, 0, 12416, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438727784398860_279_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438727784398860_279_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2de05323 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438727784398860_279_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,130 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1566 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 1008, 2863311530, 2863311530, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2064, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2068, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2080, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2084, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2096, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2100, 1431655765, 1431655765, 2704, 17, 0, 2704, 17, 0, 2708, 17, 0, 2708, 17, 0, 2720, 17, 0, 2720, 17, 0, 2724, 17, 0, 2724, 17, 0, 2736, 17, 0, 2736, 17, 0, 2740, 17, 0, 2740, 17, 0, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3280, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3284, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3296, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3300, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3312, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3316, 286331153, 286331153, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3600, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3604, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3616, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3620, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3632, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 3636, 1145324612, 1145324612, 4048, 559240, 0, 4048, 559240, 0, 4048, 559240, 0, 4048, 559240, 0, 4048, 559240, 0, 4052, 559240, 0, 4052, 559240, 0, 4052, 559240, 0, 4052, 559240, 0, 4052, 559240, 0, 4064, 559240, 0, 4064, 559240, 0, 4064, 559240, 0, 4064, 559240, 0, 4064, 559240, 0, 4068, 559240, 0, 4068, 559240, 0, 4068, 559240, 0, 4068, 559240, 0, 4068, 559240, 0, 4080, 559240, 0, 4080, 559240, 0, 4080, 559240, 0, 4080, 559240, 0, 4080, 559240, 0, 4084, 559240, 0, 4084, 559240, 0, 4084, 559240, 0, 4084, 559240, 0, 4084, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438785191313600_281_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438785191313600_281_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fdbb8aee --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438785191313600_281_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 43))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((98 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 60))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1227 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1216, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1232, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1248, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1920, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1936, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 1952, 1090785345, 272696336, 2496, 272696336, 68174084, 2496, 272696336, 68174084, 2496, 272696336, 68174084, 2496, 272696336, 68174084, 2496, 272696336, 68174084, 2496, 272696336, 68174084, 2496, 272696336, 68174084, 2496, 272696336, 68174084, 2496, 272696336, 68174084, 2496, 272696336, 68174084, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 2816, 613566756, 1227133513, 3920, 1365, 0, 3920, 1365, 0, 3920, 1365, 0, 3920, 1365, 0, 3920, 1365, 0, 3920, 1365, 0, 3936, 1365, 0, 3936, 1365, 0, 3936, 1365, 0, 3936, 1365, 0, 3936, 1365, 0, 3936, 1365, 0, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5136, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5140, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5144, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5152, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5156, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 5160, 1365, 1430257664, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6288, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6292, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6296, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6304, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6308, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6312, 85, 1431650304, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6864, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6868, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6872, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6880, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6884, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 6888, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7440, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7444, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7448, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7456, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7460, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365, 7464, 1431568384, 1365] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438873495494386_284_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438873495494386_284_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db52f622 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438873495494386_284_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,221 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 4))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3024, 268435472, 0, 3024, 268435472, 0, 3040, 268435472, 0, 3040, 268435472, 0, 3344, 0, 64, 3360, 0, 64, 4480, 10, 2684354560, 4480, 10, 2684354560, 4480, 10, 2684354560, 4480, 10, 2684354560, 5632, 10, 2684354560, 5632, 10, 2684354560, 5632, 10, 2684354560, 5632, 10, 2684354560, 7936, 10, 2684354560, 7936, 10, 2684354560, 7936, 10, 2684354560, 7936, 10, 2684354560, 8384, 163840, 0, 8384, 163840, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438880762519611_285_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438880762519611_285_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a99f5a18 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438880762519611_285_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2880, 545392672, 136348168, 2880, 545392672, 136348168, 2880, 545392672, 136348168, 2880, 545392672, 136348168, 2880, 545392672, 136348168, 2880, 545392672, 136348168, 2880, 545392672, 136348168, 2880, 545392672, 136348168, 2880, 545392672, 136348168, 2880, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438886614414418_287_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438886614414418_287_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..822c9827 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438886614414418_287_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,136 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 46)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 50)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 53)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 0, 2454257664, 768, 0, 2454257664, 768, 0, 2454257664, 768, 0, 2454257664, 768, 0, 2454257664, 768, 0, 2454257664, 1216, 73, 0, 1216, 73, 0, 1216, 73, 0, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3328, 272696336, 68174084, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513, 3648, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438887138874988_288_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438887138874988_288_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0f0a1e69 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438887138874988_288_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,192 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 36)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 27))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 69905, 286330880, 3008, 69905, 286330880, 3008, 69905, 286330880, 3008, 69905, 286330880, 3008, 69905, 286330880, 3008, 69905, 286330880, 3008, 69905, 286330880, 3008, 69905, 286330880, 3008, 69905, 286330880, 3008, 69905, 286330880, 5120, 4369, 286326784, 5120, 4369, 286326784, 5120, 4369, 286326784, 5120, 4369, 286326784, 5120, 4369, 286326784, 5120, 4369, 286326784, 5120, 4369, 286326784, 5120, 4369, 286326784, 8640, 559240, 0, 8640, 559240, 0, 8640, 559240, 0, 8640, 559240, 0, 8640, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438888792615541_289_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438888792615541_289_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4eb8d188 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438888792615541_289_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,194 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 54))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((105 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 2304, 21845, 1430257664, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5204, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5208, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5212, 1, 4292870144, 5844, 9, 0, 5844, 9, 0, 5848, 9, 0, 5848, 9, 0, 5852, 9, 0, 5852, 9, 0, 6420, 0, 67108864, 6424, 0, 67108864, 6428, 0, 67108864, 6740, 4, 1226833920, 6740, 4, 1226833920, 6740, 4, 1226833920, 6740, 4, 1226833920, 6740, 4, 1226833920, 6744, 4, 1226833920, 6744, 4, 1226833920, 6744, 4, 1226833920, 6744, 4, 1226833920, 6744, 4, 1226833920, 6748, 4, 1226833920, 6748, 4, 1226833920, 6748, 4, 1226833920, 6748, 4, 1226833920, 6748, 4, 1226833920, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7828, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7832, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 7836, 15, 4292870144, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 524272, 0, 8528, 15, 4160749568, 8528, 15, 4160749568, 8528, 15, 4160749568, 8528, 15, 4160749568, 8528, 15, 4160749568, 8528, 15, 4160749568, 8528, 15, 4160749568, 8528, 15, 4160749568, 8528, 15, 4160749568] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438897497718882_290_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438897497718882_290_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..270b1006 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438897497718882_290_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,305 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((146 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 27)) { + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 53)) { + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 564 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 2688, 68174084, 1090785345, 6464, 7, 2147483648, 6464, 7, 2147483648, 6464, 7, 2147483648, 6464, 7, 2147483648, 8448, 524288, 33555457, 8448, 524288, 33555457, 8448, 524288, 33555457, 8448, 524288, 33555457, 8464, 524288, 33555457, 8464, 524288, 33555457, 8464, 524288, 33555457, 8464, 524288, 33555457, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9348, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9352, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9356, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9364, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9368, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 9372, 32760, 0, 10752, 1, 2449473536, 10752, 1, 2449473536, 10752, 1, 2449473536, 10752, 1, 2449473536, 10768, 1, 2449473536, 10768, 1, 2449473536, 10768, 1, 2449473536, 10768, 1, 2449473536, 10784, 1, 2449473536, 10784, 1, 2449473536, 10784, 1, 2449473536, 10784, 1, 2449473536, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11648, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11664, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 11680, 136348168, 2181570690, 12352, 1, 2453667840, 12352, 1, 2453667840, 12352, 1, 2453667840, 12352, 1, 2453667840, 12352, 1, 2453667840, 12368, 1, 2453667840, 12368, 1, 2453667840, 12368, 1, 2453667840, 12368, 1, 2453667840, 12368, 1, 2453667840, 12384, 1, 2453667840, 12384, 1, 2453667840, 12384, 1, 2453667840, 12384, 1, 2453667840, 12384, 1, 2453667840, 16384, 272696336, 68174084, 16384, 272696336, 68174084, 16384, 272696336, 68174084, 16384, 272696336, 68174084, 16384, 272696336, 68174084, 16384, 272696336, 68174084, 16384, 272696336, 68174084, 16384, 272696336, 68174084, 16384, 272696336, 68174084, 16384, 272696336, 68174084, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513, 16704, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438962682148757_291_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438962682148757_291_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17450861 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438962682148757_291_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 46))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 41)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((58 << 6) | (i0 << 4)) | (counter1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3717, 0, 16384, 3718, 0, 16384, 3719, 0, 16384, 3721, 0, 16384, 3722, 0, 16384, 3723, 0, 16384, 3733, 0, 16384, 3734, 0, 16384, 3735, 0, 16384, 3737, 0, 16384, 3738, 0, 16384, 3739, 0, 16384] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438967742659745_292_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438967742659745_292_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f7d13803 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438967742659745_292_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,219 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 60))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 53)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 48))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 50))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3648, 0, 268435456, 3652, 0, 268435456, 3656, 0, 268435456, 3664, 0, 268435456, 3668, 0, 268435456, 3672, 0, 268435456, 3680, 0, 268435456, 3684, 0, 268435456, 3688, 0, 268435456, 4352, 0, 268435456, 4356, 0, 268435456, 4360, 0, 268435456, 4368, 0, 268435456, 4372, 0, 268435456, 4376, 0, 268435456, 4384, 0, 268435456, 4388, 0, 268435456, 4392, 0, 268435456, 4992, 17, 0, 4992, 17, 0, 6528, 0, 16777216, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 12160, 0, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438970284189264_293_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438970284189264_293_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0581dcca --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438970284189264_293_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,69 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 60)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756438970444823892_294_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756438970444823892_294_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0edd5b1b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756438970444823892_294_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,506 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 34)) { + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() < 28)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 56)) { + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 49))) { + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 10))) { + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((327 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 40))) { + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((345 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((366 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((376 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 22)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((398 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((408 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((415 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((426 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (437 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((456 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((465 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 49))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (495 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (514 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (521 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (526 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (530 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (534 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 333 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 2304, 1, 0, 4160, 268501008, 1048832, 4160, 268501008, 1048832, 4160, 268501008, 1048832, 4160, 268501008, 1048832, 4160, 268501008, 1048832, 6272, 1048832, 16781313, 6272, 1048832, 16781313, 6272, 1048832, 16781313, 6272, 1048832, 16781313, 6272, 1048832, 16781313, 18496, 512, 8192, 18496, 512, 8192, 20944, 67108864, 0, 20948, 67108864, 0, 20952, 67108864, 0, 20960, 67108864, 0, 20964, 67108864, 0, 20968, 67108864, 0, 20976, 67108864, 0, 20980, 67108864, 0, 20984, 67108864, 0, 24080, 0, 1145324608, 24080, 0, 1145324608, 24080, 0, 1145324608, 24080, 0, 1145324608, 24080, 0, 1145324608, 24080, 0, 1145324608, 24080, 0, 1145324608, 24096, 0, 1145324608, 24096, 0, 1145324608, 24096, 0, 1145324608, 24096, 0, 1145324608, 24096, 0, 1145324608, 24096, 0, 1145324608, 24096, 0, 1145324608, 24112, 0, 1145324608, 24112, 0, 1145324608, 24112, 0, 1145324608, 24112, 0, 1145324608, 24112, 0, 1145324608, 24112, 0, 1145324608, 24112, 0, 1145324608, 27264, 559240, 0, 27264, 559240, 0, 27264, 559240, 0, 27264, 559240, 0, 27264, 559240, 0, 27280, 559240, 0, 27280, 559240, 0, 27280, 559240, 0, 27280, 559240, 0, 27280, 559240, 0, 27296, 559240, 0, 27296, 559240, 0, 27296, 559240, 0, 27296, 559240, 0, 27296, 559240, 0, 27968, 0, 2290089984, 27968, 0, 2290089984, 27968, 0, 2290089984, 29776, 134217728, 2148008064, 29776, 134217728, 2148008064, 29776, 134217728, 2148008064, 29776, 134217728, 2148008064, 29792, 134217728, 2148008064, 29792, 134217728, 2148008064, 29792, 134217728, 2148008064, 29792, 134217728, 2148008064, 29808, 134217728, 2148008064, 29808, 134217728, 2148008064, 29808, 134217728, 2148008064, 29808, 134217728, 2148008064, 33664, 8388608, 134250504, 33664, 8388608, 134250504, 33664, 8388608, 134250504, 33664, 8388608, 134250504] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439074097231611_296_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439074097231611_296_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6bcde254 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439074097231611_296_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,282 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 43))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((177 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 507 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1616, 256, 0, 1632, 256, 0, 1648, 256, 0, 10304, 536870912, 131072, 10304, 536870912, 131072, 10320, 536870912, 131072, 10320, 536870912, 131072, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11332, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11336, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11348, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 11352, 572662306, 572662306, 13056, 0, 512, 13072, 0, 512, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 13568, 1717986918, 1717986918, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 14464, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15680, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15696, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224, 15712, 2290649224, 2290649224] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439100784687289_299_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439100784687289_299_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9c109b06 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439100784687289_299_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,251 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((161 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 336 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2880, 279620, 1145323520, 2880, 279620, 1145323520, 2880, 279620, 1145323520, 2880, 279620, 1145323520, 2880, 279620, 1145323520, 2880, 279620, 1145323520, 2880, 279620, 1145323520, 2880, 279620, 1145323520, 2880, 279620, 1145323520, 2880, 279620, 1145323520, 2884, 279620, 1145323520, 2884, 279620, 1145323520, 2884, 279620, 1145323520, 2884, 279620, 1145323520, 2884, 279620, 1145323520, 2884, 279620, 1145323520, 2884, 279620, 1145323520, 2884, 279620, 1145323520, 2884, 279620, 1145323520, 2884, 279620, 1145323520, 2896, 279620, 1145323520, 2896, 279620, 1145323520, 2896, 279620, 1145323520, 2896, 279620, 1145323520, 2896, 279620, 1145323520, 2896, 279620, 1145323520, 2896, 279620, 1145323520, 2896, 279620, 1145323520, 2896, 279620, 1145323520, 2896, 279620, 1145323520, 2900, 279620, 1145323520, 2900, 279620, 1145323520, 2900, 279620, 1145323520, 2900, 279620, 1145323520, 2900, 279620, 1145323520, 2900, 279620, 1145323520, 2900, 279620, 1145323520, 2900, 279620, 1145323520, 2900, 279620, 1145323520, 2900, 279620, 1145323520, 3712, 68, 1140850688, 3712, 68, 1140850688, 3712, 68, 1140850688, 3712, 68, 1140850688, 3716, 68, 1140850688, 3716, 68, 1140850688, 3716, 68, 1140850688, 3716, 68, 1140850688, 3728, 68, 1140850688, 3728, 68, 1140850688, 3728, 68, 1140850688, 3728, 68, 1140850688, 3732, 68, 1140850688, 3732, 68, 1140850688, 3732, 68, 1140850688, 3732, 68, 1140850688, 4864, 0, 67108864, 4880, 0, 67108864, 5312, 559240, 0, 5312, 559240, 0, 5312, 559240, 0, 5312, 559240, 0, 5312, 559240, 0, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 11584, 1717986918, 1717986918, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0, 12032, 978670, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439117465735402_300_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439117465735402_300_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5cd8c62e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439117465735402_300_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,303 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 62)) { + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 58)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 0, 3221225472, 1792, 0, 3221225472, 8448, 4, 16777224, 8448, 4, 16777224, 8448, 4, 16777224, 9088, 7, 0, 9088, 7, 0, 9088, 7, 0, 11200, 2, 0, 11520, 4, 0, 12160, 73, 0, 12160, 73, 0, 12160, 73, 0, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 12736, 1363481681, 340870420, 13888, 292, 1207959552, 13888, 292, 1207959552, 13888, 292, 1207959552, 13888, 292, 1207959552, 13888, 292, 1207959552, 15872, 613564416, 4681, 15872, 613564416, 4681, 15872, 613564416, 4681, 15872, 613564416, 4681, 15872, 613564416, 4681, 15872, 613564416, 4681, 15872, 613564416, 4681, 15872, 613564416, 4681, 15872, 613564416, 4681, 15872, 613564416, 4681, 15872, 613564416, 4681] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439120508018579_301_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439120508018579_301_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e8b5e545 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439120508018579_301_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,328 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 51))) { + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 43))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 29)) { + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((323 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((330 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((341 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 750 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2128, 1, 268435456, 2128, 1, 268435456, 2132, 1, 268435456, 2132, 1, 268435456, 2136, 1, 268435456, 2136, 1, 268435456, 2144, 1, 268435456, 2144, 1, 268435456, 2148, 1, 268435456, 2148, 1, 268435456, 2152, 1, 268435456, 2152, 1, 268435456, 2160, 1, 268435456, 2160, 1, 268435456, 2164, 1, 268435456, 2164, 1, 268435456, 2168, 1, 268435456, 2168, 1, 268435456, 4288, 2, 572653568, 4288, 2, 572653568, 4288, 2, 572653568, 4288, 2, 572653568, 4288, 2, 572653568, 6404, 2, 572661760, 6404, 2, 572661760, 6404, 2, 572661760, 6404, 2, 572661760, 6404, 2, 572661760, 6404, 2, 572661760, 6408, 2, 572661760, 6408, 2, 572661760, 6408, 2, 572661760, 6408, 2, 572661760, 6408, 2, 572661760, 6408, 2, 572661760, 6420, 2, 572661760, 6420, 2, 572661760, 6420, 2, 572661760, 6420, 2, 572661760, 6420, 2, 572661760, 6420, 2, 572661760, 6424, 2, 572661760, 6424, 2, 572661760, 6424, 2, 572661760, 6424, 2, 572661760, 6424, 2, 572661760, 6424, 2, 572661760, 6436, 2, 572661760, 6436, 2, 572661760, 6436, 2, 572661760, 6436, 2, 572661760, 6436, 2, 572661760, 6436, 2, 572661760, 6440, 2, 572661760, 6440, 2, 572661760, 6440, 2, 572661760, 6440, 2, 572661760, 6440, 2, 572661760, 6440, 2, 572661760, 6976, 2, 572661760, 6976, 2, 572661760, 6976, 2, 572661760, 6976, 2, 572661760, 6976, 2, 572661760, 6976, 2, 572661760, 6992, 2, 572661760, 6992, 2, 572661760, 6992, 2, 572661760, 6992, 2, 572661760, 6992, 2, 572661760, 6992, 2, 572661760, 7008, 2, 572661760, 7008, 2, 572661760, 7008, 2, 572661760, 7008, 2, 572661760, 7008, 2, 572661760, 7008, 2, 572661760, 7872, 2, 572522496, 7872, 2, 572522496, 7872, 2, 572522496, 7872, 2, 572522496, 11200, 33554432, 0, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 11520, 1145324612, 1145324612, 13696, 8, 0, 13712, 8, 0, 13728, 8, 0, 17408, 0, 2148007936, 17408, 0, 2148007936, 17424, 0, 2148007936, 17424, 0, 2148007936, 17440, 0, 2148007936, 17440, 0, 2148007936, 19520, 8390656, 134250504, 19520, 8390656, 134250504, 19520, 8390656, 134250504, 19520, 8390656, 134250504, 19520, 8390656, 134250504, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20672, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20688, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 20704, 524287, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21120, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21136, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21152, 33554431, 0, 21824, 2048, 16, 21824, 2048, 16, 21840, 2048, 16, 21840, 2048, 16, 21856, 2048, 16, 21856, 2048, 16] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439271522187800_302_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439271522187800_302_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..15ae1e5a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439271522187800_302_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 123 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4160, 2147483664, 33587208, 4160, 2147483664, 33587208, 4160, 2147483664, 33587208, 4160, 2147483664, 33587208, 4160, 2147483664, 33587208, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3776, 715827882, 2829724322, 3520, 67108864, 1048592, 3520, 67108864, 1048592, 3520, 67108864, 1048592, 3136, 0, 1430257664, 3136, 0, 1430257664, 3136, 0, 1430257664, 3136, 0, 1430257664, 3136, 0, 1430257664] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439271941971278_303_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439271941971278_303_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9fa42611 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439271941971278_303_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 1430257664, 1280, 1, 1430257664, 1280, 1, 1430257664, 1280, 1, 1430257664, 1280, 1, 1430257664, 1280, 1, 1430257664, 4096, 1, 1409286144, 4096, 1, 1409286144, 4096, 1, 1409286144, 4096, 1, 1409286144, 5312, 65536, 0, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6352, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6368, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 6384, 1431655764, 349525, 8464, 1073741844, 16, 8464, 1073741844, 16, 8464, 1073741844, 16, 8464, 1073741844, 16, 8480, 1073741844, 16, 8480, 1073741844, 16, 8480, 1073741844, 16, 8480, 1073741844, 16, 8496, 1073741844, 16, 8496, 1073741844, 16, 8496, 1073741844, 16, 8496, 1073741844, 16] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439295745756860_305_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439295745756860_305_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28398b06 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439295745756860_305_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439296028433453_306_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439296028433453_306_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8ee50294 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439296028433453_306_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,353 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 43))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (300 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 58))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (333 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (366 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (375 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 2176, 1, 285212672, 2176, 1, 285212672, 2176, 1, 285212672, 4480, 1, 285212672, 4480, 1, 285212672, 4480, 1, 285212672, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 286331152, 1118481, 5056, 1, 285212672, 5056, 1, 285212672, 5056, 1, 285212672, 11200, 0, 1145044992, 11200, 0, 1145044992, 11200, 0, 1145044992, 24320, 8390656, 134250504, 24320, 8390656, 134250504, 24320, 8390656, 134250504, 24320, 8390656, 134250504, 24320, 8390656, 134250504] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439298336192682_307_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439298336192682_307_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e337bcd1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439298336192682_307_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 40)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 4224, 545392672, 136348168, 4224, 545392672, 136348168, 4224, 545392672, 136348168, 4224, 545392672, 136348168, 4224, 545392672, 136348168, 4224, 545392672, 136348168, 4224, 545392672, 136348168, 4224, 545392672, 136348168, 4224, 545392672, 136348168, 4224, 545392672, 136348168, 5904, 0, 2097152, 5920, 0, 2097152, 7424, 67108864, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439378180562594_309_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439378180562594_309_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c82a751d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439378180562594_309_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((29 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((47 << 6) | (i0 << 4)) | (counter1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1860, 8389632, 2097152, 1860, 8389632, 2097152, 1860, 8389632, 2097152, 1864, 8389632, 2097152, 1864, 8389632, 2097152, 1864, 8389632, 2097152, 1876, 8389632, 2097152, 1876, 8389632, 2097152, 1876, 8389632, 2097152, 1880, 8389632, 2097152, 1880, 8389632, 2097152, 1880, 8389632, 2097152, 1892, 8389632, 2097152, 1892, 8389632, 2097152, 1892, 8389632, 2097152, 1896, 8389632, 2097152, 1896, 8389632, 2097152, 1896, 8389632, 2097152, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3588, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3592, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3604, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3608, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3620, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420, 3624, 1363481681, 340870420] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439524659418479_311_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439524659418479_311_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0a8c68b9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439524659418479_311_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 33)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 57))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 3, 4227858432, 1600, 3, 4227858432, 1600, 3, 4227858432, 1600, 3, 4227858432, 1600, 3, 4227858432, 1600, 3, 4227858432, 1600, 3, 4227858432, 1600, 3, 4227858432, 1616, 3, 4227858432, 1616, 3, 4227858432, 1616, 3, 4227858432, 1616, 3, 4227858432, 1616, 3, 4227858432, 1616, 3, 4227858432, 1616, 3, 4227858432, 1616, 3, 4227858432, 4352, 2, 0, 4368, 2, 0, 4672, 0, 1207959552, 4672, 0, 1207959552, 4688, 0, 1207959552, 4688, 0, 1207959552, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5248, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 5264, 4294967292, 0, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8576, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672, 8592, 0, 67100672] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439542853488740_313_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439542853488740_313_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cd486c4e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439542853488740_313_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 444 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2000, 299593, 0, 2000, 299593, 0, 2000, 299593, 0, 2000, 299593, 0, 2000, 299593, 0, 2000, 299593, 0, 2000, 299593, 0, 2004, 299593, 0, 2004, 299593, 0, 2004, 299593, 0, 2004, 299593, 0, 2004, 299593, 0, 2004, 299593, 0, 2004, 299593, 0, 2008, 299593, 0, 2008, 299593, 0, 2008, 299593, 0, 2008, 299593, 0, 2008, 299593, 0, 2008, 299593, 0, 2008, 299593, 0, 2016, 299593, 0, 2016, 299593, 0, 2016, 299593, 0, 2016, 299593, 0, 2016, 299593, 0, 2016, 299593, 0, 2016, 299593, 0, 2020, 299593, 0, 2020, 299593, 0, 2020, 299593, 0, 2020, 299593, 0, 2020, 299593, 0, 2020, 299593, 0, 2020, 299593, 0, 2024, 299593, 0, 2024, 299593, 0, 2024, 299593, 0, 2024, 299593, 0, 2024, 299593, 0, 2024, 299593, 0, 2024, 299593, 0, 2032, 299593, 0, 2032, 299593, 0, 2032, 299593, 0, 2032, 299593, 0, 2032, 299593, 0, 2032, 299593, 0, 2032, 299593, 0, 2036, 299593, 0, 2036, 299593, 0, 2036, 299593, 0, 2036, 299593, 0, 2036, 299593, 0, 2036, 299593, 0, 2036, 299593, 0, 2040, 299593, 0, 2040, 299593, 0, 2040, 299593, 0, 2040, 299593, 0, 2040, 299593, 0, 2040, 299593, 0, 2040, 299593, 0, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2560, 1363481681, 340870420, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295, 2880, 4294967295, 4294967295] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439546382930720_314_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439546382930720_314_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e5331f2b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439546382930720_314_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 63))) { + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 4672, 32, 0, 4688, 32, 0, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 4992, 1145324612, 1145324612, 5440, 559240, 0, 5440, 559240, 0, 5440, 559240, 0, 5440, 559240, 0, 5440, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439547008355150_315_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439547008355150_315_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6aca3339 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439547008355150_315_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,363 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 58)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 54)) { + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 58))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((274 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((320 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 47))) { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((377 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((396 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((411 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 4480, 16, 0, 4496, 16, 0, 5184, 1040, 1048576, 5184, 1040, 1048576, 5184, 1040, 1048576, 5200, 1040, 1048576, 5200, 1040, 1048576, 5200, 1040, 1048576, 7040, 0, 536870912, 8000, 0, 536870912, 8016, 0, 536870912, 8032, 0, 536870912, 8448, 0, 545259520, 8448, 0, 545259520, 8464, 0, 545259520, 8464, 0, 545259520, 8480, 0, 545259520, 8480, 0, 545259520, 11520, 2048, 16777217, 11520, 2048, 16777217, 11520, 2048, 16777217, 11536, 2048, 16777217, 11536, 2048, 16777217, 11536, 2048, 16777217, 16576, 1073742080, 65536, 16576, 1073742080, 65536, 16576, 1073742080, 65536, 18112, 4096, 0, 18128, 4096, 0, 18144, 4096, 0, 19072, 4194304, 0, 20496, 2097152, 8388608, 20496, 2097152, 8388608, 20512, 2097152, 8388608, 20512, 2097152, 8388608, 22672, 0, 8192, 22688, 0, 8192, 25360, 131104, 0, 25360, 131104, 0, 25376, 131104, 0, 25376, 131104, 0, 26320, 0, 512, 26336, 0, 512] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439931641770194_318_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439931641770194_318_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3918cc15 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439931641770194_318_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,317 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 33)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 48))) { + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 85, 1431568384, 1472, 85, 1431568384, 1472, 85, 1431568384, 1472, 85, 1431568384, 1472, 85, 1431568384, 1472, 85, 1431568384, 1472, 85, 1431568384, 1472, 85, 1431568384, 1472, 85, 1431568384, 1472, 85, 1431568384, 1472, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 1488, 85, 1431568384, 2688, 0, 65536, 2692, 0, 65536, 2696, 0, 65536, 2704, 0, 65536, 2708, 0, 65536, 2712, 0, 65536, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3776, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 3792, 5461, 1431306240, 5056, 42, 2852126720, 5056, 42, 2852126720, 5056, 42, 2852126720, 5056, 42, 2852126720, 5056, 42, 2852126720, 5056, 42, 2852126720, 5056, 42, 2852126720, 7424, 42, 2852126720, 7424, 42, 2852126720, 7424, 42, 2852126720, 7424, 42, 2852126720, 7424, 42, 2852126720, 7424, 42, 2852126720, 7424, 42, 2852126720, 14080, 67108864, 0, 16512, 64, 1145323520, 16512, 64, 1145323520, 16512, 64, 1145323520, 16512, 64, 1145323520, 16512, 64, 1145323520, 16512, 64, 1145323520, 16528, 64, 1145323520, 16528, 64, 1145323520, 16528, 64, 1145323520, 16528, 64, 1145323520, 16528, 64, 1145323520, 16528, 64, 1145323520, 16544, 64, 1145323520, 16544, 64, 1145323520, 16544, 64, 1145323520, 16544, 64, 1145323520, 16544, 64, 1145323520, 16544, 64, 1145323520, 17344, 1088, 1145323520, 17344, 1088, 1145323520, 17344, 1088, 1145323520, 17344, 1088, 1145323520, 17344, 1088, 1145323520, 17344, 1088, 1145323520, 17344, 1088, 1145323520, 17360, 1088, 1145323520, 17360, 1088, 1145323520, 17360, 1088, 1145323520, 17360, 1088, 1145323520, 17360, 1088, 1145323520, 17360, 1088, 1145323520, 17360, 1088, 1145323520, 17376, 1088, 1145323520, 17376, 1088, 1145323520, 17376, 1088, 1145323520, 17376, 1088, 1145323520, 17376, 1088, 1145323520, 17376, 1088, 1145323520, 17376, 1088, 1145323520, 19904, 262144, 262148, 19904, 262144, 262148, 19904, 262144, 262148, 20352, 559240, 0, 20352, 559240, 0, 20352, 559240, 0, 20352, 559240, 0, 20352, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439973190341424_319_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439973190341424_319_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dbf9fac3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439973190341424_319_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,348 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 57))) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 45)) { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((234 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + } + case 1: { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 19))) { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 52))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (424 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (443 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 132 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [19776, 2, 570425344, 19776, 2, 570425344, 19776, 2, 570425344, 20480, 0, 536870912, 21952, 0, 8192, 21968, 0, 8192, 27584, 2, 0, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 27904, 1717986918, 1717986918, 28352, 559240, 0, 28352, 559240, 0, 28352, 559240, 0, 28352, 559240, 0, 28352, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439974760922838_320_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439974760922838_320_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..238f0783 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439974760922838_320_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 6))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 36)) { + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 54))) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 44)) { + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2128, 0, 2, 2144, 0, 2, 2160, 0, 2, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 6080, 4, 0, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 8512, 68174084, 1090785345, 9472, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439977367034941_321_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439977367034941_321_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d6dd7d4e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439977367034941_321_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 41)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439977590821383_322_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439977590821383_322_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3db690fa --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439977590821383_322_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 53))) { + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 10))) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 39 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3072, 0, 2097152, 3088, 0, 2097152, 4228, 4096, 0, 4232, 4096, 0, 4236, 4096, 0, 4244, 4096, 0, 4248, 4096, 0, 4252, 4096, 0, 12160, 67108880, 134218752, 12160, 67108880, 134218752, 12160, 67108880, 134218752, 12160, 67108880, 134218752, 11776, 0, 536870912] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439978309726080_323_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439978309726080_323_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439978309726080_323_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439978556012858_324_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439978556012858_324_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cac51aea --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439978556012858_324_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439978818846341_325_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439978818846341_325_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eae7b0b7 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439978818846341_325_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,236 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 55))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 57)) { + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 48)) { + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 51 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6912, 17, 0, 6912, 17, 0, 9344, 34, 8192, 9344, 34, 8192, 9344, 34, 8192, 9360, 34, 8192, 9360, 34, 8192, 9360, 34, 8192, 12176, 0, 2147483648, 12192, 0, 2147483648, 12208, 0, 2147483648, 12880, 0, 2281701376, 12880, 0, 2281701376, 12896, 0, 2281701376, 12896, 0, 2281701376, 12912, 0, 2281701376, 12912, 0, 2281701376] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756439984570829407_326_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756439984570829407_326_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ced6c1a6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756439984570829407_326_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,269 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((248 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + if ((counter4 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 1024, 286331153, 286331153, 2176, 17, 268435456, 2176, 17, 268435456, 2176, 17, 268435456, 3216, 69905, 286326784, 3216, 69905, 286326784, 3216, 69905, 286326784, 3216, 69905, 286326784, 3216, 69905, 286326784, 3216, 69905, 286326784, 3216, 69905, 286326784, 3216, 69905, 286326784, 3216, 69905, 286326784, 3232, 69905, 286326784, 3232, 69905, 286326784, 3232, 69905, 286326784, 3232, 69905, 286326784, 3232, 69905, 286326784, 3232, 69905, 286326784, 3232, 69905, 286326784, 3232, 69905, 286326784, 3232, 69905, 286326784, 10640, 0, 1024, 10656, 0, 1024, 12432, 9830, 1073741824, 12432, 9830, 1073741824, 12432, 9830, 1073741824, 12432, 9830, 1073741824, 12432, 9830, 1073741824, 12432, 9830, 1073741824, 12432, 9830, 1073741824, 12432, 9830, 1073741824, 12436, 9830, 1073741824, 12436, 9830, 1073741824, 12436, 9830, 1073741824, 12436, 9830, 1073741824, 12436, 9830, 1073741824, 12436, 9830, 1073741824, 12436, 9830, 1073741824, 12436, 9830, 1073741824, 12448, 9830, 1073741824, 12448, 9830, 1073741824, 12448, 9830, 1073741824, 12448, 9830, 1073741824, 12448, 9830, 1073741824, 12448, 9830, 1073741824, 12448, 9830, 1073741824, 12448, 9830, 1073741824, 12452, 9830, 1073741824, 12452, 9830, 1073741824, 12452, 9830, 1073741824, 12452, 9830, 1073741824, 12452, 9830, 1073741824, 12452, 9830, 1073741824, 12452, 9830, 1073741824, 12452, 9830, 1073741824, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 12864, 978670, 0, 13504, 85, 0, 13504, 85, 0, 13504, 85, 0, 13504, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440015098982672_327_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440015098982672_327_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..431f8d48 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440015098982672_327_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,101 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 38)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 46))) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295, 3712, 4294967295, 4294967295] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440041039039526_330_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440041039039526_330_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4b65a091 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440041039039526_330_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,278 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 26)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 62))) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 62))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 354 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1808, 0, 1048576, 1824, 0, 1048576, 3536, 17, 0, 3536, 17, 0, 3540, 17, 0, 3540, 17, 0, 3544, 17, 0, 3544, 17, 0, 3552, 17, 0, 3552, 17, 0, 3556, 17, 0, 3556, 17, 0, 3560, 17, 0, 3560, 17, 0, 3984, 0, 286330880, 3984, 0, 286330880, 3984, 0, 286330880, 3984, 0, 286330880, 3984, 0, 286330880, 3988, 0, 286330880, 3988, 0, 286330880, 3988, 0, 286330880, 3988, 0, 286330880, 3988, 0, 286330880, 3992, 0, 286330880, 3992, 0, 286330880, 3992, 0, 286330880, 3992, 0, 286330880, 3992, 0, 286330880, 4000, 0, 286330880, 4000, 0, 286330880, 4000, 0, 286330880, 4000, 0, 286330880, 4000, 0, 286330880, 4004, 0, 286330880, 4004, 0, 286330880, 4004, 0, 286330880, 4004, 0, 286330880, 4004, 0, 286330880, 4008, 0, 286330880, 4008, 0, 286330880, 4008, 0, 286330880, 4008, 0, 286330880, 4008, 0, 286330880, 4432, 273, 0, 4432, 273, 0, 4432, 273, 0, 4436, 273, 0, 4436, 273, 0, 4436, 273, 0, 4440, 273, 0, 4440, 273, 0, 4440, 273, 0, 4448, 273, 0, 4448, 273, 0, 4448, 273, 0, 4452, 273, 0, 4452, 273, 0, 4452, 273, 0, 4456, 273, 0, 4456, 273, 0, 4456, 273, 0, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 5504, 1145324612, 1145324612, 14144, 268439552, 1090519296, 14144, 268439552, 1090519296, 14144, 268439552, 1090519296, 14144, 268439552, 1090519296, 14144, 268439552, 1090519296, 14160, 268439552, 1090519296, 14160, 268439552, 1090519296, 14160, 268439552, 1090519296, 14160, 268439552, 1090519296, 14160, 268439552, 1090519296, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16768, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480, 16784, 5461, 357908480] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440066946125657_331_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440066946125657_331_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..13eb379d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440066946125657_331_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 42)) { + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 540 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1792, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1808, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 1824, 2181570690, 545392672, 2752, 0, 613566720, 2752, 0, 613566720, 2752, 0, 613566720, 2752, 0, 613566720, 2752, 0, 613566720, 2752, 0, 613566720, 2752, 0, 613566720, 2752, 0, 613566720, 2756, 0, 613566720, 2756, 0, 613566720, 2756, 0, 613566720, 2756, 0, 613566720, 2756, 0, 613566720, 2756, 0, 613566720, 2756, 0, 613566720, 2756, 0, 613566720, 2760, 0, 613566720, 2760, 0, 613566720, 2760, 0, 613566720, 2760, 0, 613566720, 2760, 0, 613566720, 2760, 0, 613566720, 2760, 0, 613566720, 2760, 0, 613566720, 2768, 0, 613566720, 2768, 0, 613566720, 2768, 0, 613566720, 2768, 0, 613566720, 2768, 0, 613566720, 2768, 0, 613566720, 2768, 0, 613566720, 2768, 0, 613566720, 2772, 0, 613566720, 2772, 0, 613566720, 2772, 0, 613566720, 2772, 0, 613566720, 2772, 0, 613566720, 2772, 0, 613566720, 2772, 0, 613566720, 2772, 0, 613566720, 2776, 0, 613566720, 2776, 0, 613566720, 2776, 0, 613566720, 2776, 0, 613566720, 2776, 0, 613566720, 2776, 0, 613566720, 2776, 0, 613566720, 2776, 0, 613566720, 2784, 0, 613566720, 2784, 0, 613566720, 2784, 0, 613566720, 2784, 0, 613566720, 2784, 0, 613566720, 2784, 0, 613566720, 2784, 0, 613566720, 2784, 0, 613566720, 2788, 0, 613566720, 2788, 0, 613566720, 2788, 0, 613566720, 2788, 0, 613566720, 2788, 0, 613566720, 2788, 0, 613566720, 2788, 0, 613566720, 2788, 0, 613566720, 2792, 0, 613566720, 2792, 0, 613566720, 2792, 0, 613566720, 2792, 0, 613566720, 2792, 0, 613566720, 2792, 0, 613566720, 2792, 0, 613566720, 2792, 0, 613566720, 3200, 9362, 0, 3200, 9362, 0, 3200, 9362, 0, 3200, 9362, 0, 3200, 9362, 0, 3204, 9362, 0, 3204, 9362, 0, 3204, 9362, 0, 3204, 9362, 0, 3204, 9362, 0, 3208, 9362, 0, 3208, 9362, 0, 3208, 9362, 0, 3208, 9362, 0, 3208, 9362, 0, 3216, 9362, 0, 3216, 9362, 0, 3216, 9362, 0, 3216, 9362, 0, 3216, 9362, 0, 3220, 9362, 0, 3220, 9362, 0, 3220, 9362, 0, 3220, 9362, 0, 3220, 9362, 0, 3224, 9362, 0, 3224, 9362, 0, 3224, 9362, 0, 3224, 9362, 0, 3224, 9362, 0, 3232, 9362, 0, 3232, 9362, 0, 3232, 9362, 0, 3232, 9362, 0, 3232, 9362, 0, 3236, 9362, 0, 3236, 9362, 0, 3236, 9362, 0, 3236, 9362, 0, 3236, 9362, 0, 3240, 9362, 0, 3240, 9362, 0, 3240, 9362, 0, 3240, 9362, 0, 3240, 9362, 0, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 4928, 613566756, 1227133513, 6224, 21, 0, 6224, 21, 0, 6224, 21, 0, 6240, 21, 0, 6240, 21, 0, 6240, 21, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440106501560214_332_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440106501560214_332_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a7bce0fe --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440106501560214_332_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 57))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 18)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 165 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2000, 16384, 0, 2016, 16384, 0, 2032, 16384, 0, 3728, 16384, 0, 3732, 16384, 0, 3736, 16384, 0, 3744, 16384, 0, 3748, 16384, 0, 3752, 16384, 0, 3760, 16384, 0, 3764, 16384, 0, 3768, 16384, 0, 4160, 21, 0, 4160, 21, 0, 4160, 21, 0, 4608, 70997, 0, 4608, 70997, 0, 4608, 70997, 0, 4608, 70997, 0, 4608, 70997, 0, 4608, 70997, 0, 4608, 70997, 0, 4608, 70997, 0, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765, 5184, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440157048339958_334_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440157048339958_334_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1a59d711 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440157048339958_334_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 60)) { + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 49)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 423 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3780, 2097152, 0, 3784, 2097152, 0, 3796, 2097152, 0, 3800, 2097152, 0, 3812, 2097152, 0, 3816, 2097152, 0, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4368, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 4384, 1090785345, 272696336, 6480, 0, 131072, 6496, 0, 131072, 7952, 2, 0, 7956, 2, 0, 7968, 2, 0, 7972, 2, 0, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 9472, 613566756, 1227133513, 10112, 85, 0, 10112, 85, 0, 10112, 85, 0, 10112, 85, 0, 10752, 255, 0, 10752, 255, 0, 10752, 255, 0, 10752, 255, 0, 10752, 255, 0, 10752, 255, 0, 10752, 255, 0, 10752, 255, 0, 11392, 85, 0, 11392, 85, 0, 11392, 85, 0, 11392, 85, 0, 15120, 9, 2185232384, 15120, 9, 2185232384, 15120, 9, 2185232384, 15120, 9, 2185232384, 15120, 9, 2185232384, 15136, 9, 2185232384, 15136, 9, 2185232384, 15136, 9, 2185232384, 15136, 9, 2185232384, 15136, 9, 2185232384, 17424, 262144, 0, 17440, 262144, 0, 18512, 1, 33554432, 18512, 1, 33554432, 18528, 1, 33554432, 18528, 1, 33554432, 19216, 1, 2185822208, 19216, 1, 2185822208, 19216, 1, 2185822208, 19216, 1, 2185822208, 19216, 1, 2185822208, 19216, 1, 2185822208, 19232, 1, 2185822208, 19232, 1, 2185822208, 19232, 1, 2185822208, 19232, 1, 2185822208, 19232, 1, 2185822208, 19232, 1, 2185822208, 19776, 272696336, 68174084, 19776, 272696336, 68174084, 19776, 272696336, 68174084, 19776, 272696336, 68174084, 19776, 272696336, 68174084, 19776, 272696336, 68174084, 19776, 272696336, 68174084, 19776, 272696336, 68174084, 19776, 272696336, 68174084, 19776, 272696336, 68174084, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513, 20096, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440214662860285_336_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440214662860285_336_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3beb2826 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440214662860285_336_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,413 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 60)) { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 20)) { + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((312 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((319 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + if ((i6 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((332 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1434 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2432, 0, 268435456, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3472, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3488, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 3504, 572662306, 572662306, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4500, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4504, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4516, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4520, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4532, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 4536, 286331153, 286331153, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5076, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5080, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5092, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5096, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5108, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5112, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5648, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5664, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5680, 572662306, 572662306, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 5952, 1145324612, 1145324612, 6400, 559240, 0, 6400, 559240, 0, 6400, 559240, 0, 6400, 559240, 0, 6400, 559240, 0, 7296, 73, 0, 7296, 73, 0, 7296, 73, 0, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 7872, 1363481681, 340870420, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 8192, 4294967295, 4294967295, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9424, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 9440, 286331153, 286331153, 13568, 279620, 0, 13568, 279620, 0, 13568, 279620, 0, 13568, 279620, 0, 13568, 279620, 0, 16576, 0, 1145307136, 16576, 0, 1145307136, 16576, 0, 1145307136, 16576, 0, 1145307136, 16592, 0, 1145307136, 16592, 0, 1145307136, 16592, 0, 1145307136, 16592, 0, 1145307136, 16608, 0, 1145307136, 16608, 0, 1145307136, 16608, 0, 1145307136, 16608, 0, 1145307136, 18048, 0, 1024, 20416, 0, 2290647040, 20416, 0, 2290647040, 20416, 0, 2290647040, 20416, 0, 2290647040, 20416, 0, 2290647040, 20420, 0, 2290647040, 20420, 0, 2290647040, 20420, 0, 2290647040, 20420, 0, 2290647040, 20420, 0, 2290647040, 20424, 0, 2290647040, 20424, 0, 2290647040, 20424, 0, 2290647040, 20424, 0, 2290647040, 20424, 0, 2290647040, 20432, 0, 2290647040, 20432, 0, 2290647040, 20432, 0, 2290647040, 20432, 0, 2290647040, 20432, 0, 2290647040, 20436, 0, 2290647040, 20436, 0, 2290647040, 20436, 0, 2290647040, 20436, 0, 2290647040, 20436, 0, 2290647040, 20440, 0, 2290647040, 20440, 0, 2290647040, 20440, 0, 2290647040, 20440, 0, 2290647040, 20440, 0, 2290647040] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440310981787780_338_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440310981787780_338_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ec3e00cf --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440310981787780_338_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,70 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 2, 536870928, 1360, 2, 536870928, 1360, 2, 536870928, 1376, 2, 536870928, 1376, 2, 536870928, 1376, 2, 536870928, 1392, 2, 536870928, 1392, 2, 536870928, 1392, 2, 536870928] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440311692117435_339_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440311692117435_339_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5bb5fbdf --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440311692117435_339_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,168 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 24)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 4681, 2415919104, 1280, 4681, 2415919104, 1280, 4681, 2415919104, 1280, 4681, 2415919104, 1280, 4681, 2415919104, 1280, 4681, 2415919104, 1280, 4681, 2415919104, 1920, 73, 0, 1920, 73, 0, 1920, 73, 0, 3648, 73, 2454192128, 3648, 73, 2454192128, 3648, 73, 2454192128, 3648, 73, 2454192128, 3648, 73, 2454192128, 3648, 73, 2454192128, 3648, 73, 2454192128, 3648, 73, 2454192128, 5200, 16, 0, 5216, 16, 0, 5232, 16, 0, 5840, 599186, 0, 5840, 599186, 0, 5840, 599186, 0, 5840, 599186, 0, 5840, 599186, 0, 5840, 599186, 0, 5840, 599186, 0, 5856, 599186, 0, 5856, 599186, 0, 5856, 599186, 0, 5856, 599186, 0, 5856, 599186, 0, 5856, 599186, 0, 5856, 599186, 0, 5872, 599186, 0, 5872, 599186, 0, 5872, 599186, 0, 5872, 599186, 0, 5872, 599186, 0, 5872, 599186, 0, 5872, 599186, 0, 6800, 16, 0, 6816, 16, 0, 6832, 16, 0, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440455537171712_343_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440455537171712_343_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..117e4936 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440455537171712_343_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 42))) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 25))) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 28)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9936, 256, 0, 11408, 0, 4096, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 14800, 143165576, 0, 14800, 143165576, 0, 14800, 143165576, 0, 14800, 143165576, 0, 14800, 143165576, 0, 14800, 143165576, 0, 14800, 143165576, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440470774015241_345_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440470774015241_345_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aa5ab03e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440470774015241_345_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,489 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 57))) { + if (((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 53))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 62))) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 55))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 54))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((393 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (398 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (403 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 49))) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (447 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 52)) { + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (457 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (464 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (475 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (484 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (488 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((502 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((511 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 687 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7232, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7248, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 7264, 2181570690, 545392672, 11392, 272696336, 68174084, 11392, 272696336, 68174084, 11392, 272696336, 68174084, 11392, 272696336, 68174084, 11392, 272696336, 68174084, 11392, 272696336, 68174084, 11392, 272696336, 68174084, 11392, 272696336, 68174084, 11392, 272696336, 68174084, 11392, 272696336, 68174084, 11408, 272696336, 68174084, 11408, 272696336, 68174084, 11408, 272696336, 68174084, 11408, 272696336, 68174084, 11408, 272696336, 68174084, 11408, 272696336, 68174084, 11408, 272696336, 68174084, 11408, 272696336, 68174084, 11408, 272696336, 68174084, 11408, 272696336, 68174084, 11424, 272696336, 68174084, 11424, 272696336, 68174084, 11424, 272696336, 68174084, 11424, 272696336, 68174084, 11424, 272696336, 68174084, 11424, 272696336, 68174084, 11424, 272696336, 68174084, 11424, 272696336, 68174084, 11424, 272696336, 68174084, 11424, 272696336, 68174084, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 17856, 613566756, 1227133513, 18496, 17, 0, 18496, 17, 0, 25472, 537002016, 2097664, 25472, 537002016, 2097664, 25472, 537002016, 2097664, 25472, 537002016, 2097664, 25472, 537002016, 2097664, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 25792, 1145324612, 1145324612, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 26688, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 30976, 2290649224, 2290649224, 32144, 3, 0, 32144, 3, 0, 32160, 3, 0, 32160, 3, 0, 32176, 3, 0, 32176, 3, 0, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32720, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32736, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280, 32752, 0, 4294967280] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440543840881938_346_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440543840881938_346_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..50ef012c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440543840881938_346_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,423 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 23)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((328 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((339 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + } + if ((i3 == 1)) { + break; + } + } + break; + } + case 3: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((360 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((378 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((385 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((392 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + uint counter7 = 0; + while ((counter7 < 3)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((408 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter8 = 0; + while ((counter8 < 3)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((426 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((444 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((455 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((464 << 6) | (counter7 << 4)) | (counter8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter7 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 2208 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1488, 1, 268500992, 1488, 1, 268500992, 1488, 1, 268500992, 1504, 1, 268500992, 1504, 1, 268500992, 1504, 1, 268500992, 1520, 1, 268500992, 1520, 1, 268500992, 1520, 1, 268500992, 2704, 1, 268500992, 2704, 1, 268500992, 2704, 1, 268500992, 2708, 1, 268500992, 2708, 1, 268500992, 2708, 1, 268500992, 2720, 1, 268500992, 2720, 1, 268500992, 2720, 1, 268500992, 2724, 1, 268500992, 2724, 1, 268500992, 2724, 1, 268500992, 2736, 1, 268500992, 2736, 1, 268500992, 2736, 1, 268500992, 2740, 1, 268500992, 2740, 1, 268500992, 2740, 1, 268500992, 3408, 4097, 268435456, 3408, 4097, 268435456, 3408, 4097, 268435456, 3412, 4097, 268435456, 3412, 4097, 268435456, 3412, 4097, 268435456, 3424, 4097, 268435456, 3424, 4097, 268435456, 3424, 4097, 268435456, 3428, 4097, 268435456, 3428, 4097, 268435456, 3428, 4097, 268435456, 3440, 4097, 268435456, 3440, 4097, 268435456, 3440, 4097, 268435456, 3444, 4097, 268435456, 3444, 4097, 268435456, 3444, 4097, 268435456, 4112, 1, 268435456, 4112, 1, 268435456, 4128, 1, 268435456, 4128, 1, 268435456, 4144, 1, 268435456, 4144, 1, 268435456, 4672, 268501008, 1048832, 4672, 268501008, 1048832, 4672, 268501008, 1048832, 4672, 268501008, 1048832, 4672, 268501008, 1048832, 6208, 1048832, 16781313, 6208, 1048832, 16781313, 6208, 1048832, 16781313, 6208, 1048832, 16781313, 6208, 1048832, 16781313, 12432, 2, 0, 12448, 2, 0, 12464, 2, 0, 15296, 2, 0, 16640, 32, 33554944, 16640, 32, 33554944, 16640, 32, 33554944, 19200, 262144, 0, 19216, 262144, 0, 23040, 0, 2290614272, 23040, 0, 2290614272, 23040, 0, 2290614272, 23040, 0, 2290614272, 23056, 0, 2290614272, 23056, 0, 2290614272, 23056, 0, 2290614272, 23056, 0, 2290614272, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26128, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 26144, 2863311530, 2863311530, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27284, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27288, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27292, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27300, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27304, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 27308, 1431655765, 1431655765, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28436, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28440, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28444, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28452, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28456, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 28460, 127, 4290772992, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29140, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29144, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29148, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29156, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29160, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29164, 3, 4294901760, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29716, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29720, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29724, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29732, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29736, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765, 29740, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756440722652165099_347_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756440722652165099_347_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f8543c3f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756440722652165099_347_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,441 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 32))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 49)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (304 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((337 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((374 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((389 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 17, 0, 1792, 17, 0, 6336, 537002016, 2097664, 6336, 537002016, 2097664, 6336, 537002016, 2097664, 6336, 537002016, 2097664, 6336, 537002016, 2097664, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 6656, 1145324612, 1145324612, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 12544, 286331153, 286331153, 13632, 0, 286326784, 13632, 0, 286326784, 13632, 0, 286326784, 13632, 0, 286326784, 13648, 0, 286326784, 13648, 0, 286326784, 13648, 0, 286326784, 13648, 0, 286326784, 13664, 0, 286326784, 13664, 0, 286326784, 13664, 0, 286326784, 13664, 0, 286326784, 14080, 273, 0, 14080, 273, 0, 14080, 273, 0, 14096, 273, 0, 14096, 273, 0, 14096, 273, 0, 14112, 273, 0, 14112, 273, 0, 14112, 273, 0, 19136, 0, 536870912, 19456, 537002016, 2097664, 19456, 537002016, 2097664, 19456, 537002016, 2097664, 19456, 537002016, 2097664, 19456, 537002016, 2097664, 22464, 0, 1145307136, 22464, 0, 1145307136, 22464, 0, 1145307136, 22464, 0, 1145307136, 23936, 0, 64, 23952, 0, 64, 23968, 0, 64, 25344, 559240, 0, 25344, 559240, 0, 25344, 559240, 0, 25344, 559240, 0, 25344, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441034168454401_350_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441034168454401_350_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e2116112 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441034168454401_350_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 60))) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 594 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 992, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 1008, 1431655765, 1431655765, 4368, 262144, 0, 4372, 262144, 0, 4384, 262144, 0, 4388, 262144, 0, 4400, 262144, 0, 4404, 262144, 0, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6480, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6496, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765, 6512, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441042257186915_351_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441042257186915_351_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c27410c6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441042257186915_351_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,279 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 38)) { + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 21))) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 25)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 207 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 0, 2181562368, 1664, 0, 2181562368, 1664, 0, 2181562368, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 4992, 2863311530, 2863311530, 11520, 85, 0, 11520, 85, 0, 11520, 85, 0, 11520, 85, 0, 13072, 42, 0, 13072, 42, 0, 13072, 42, 0, 13088, 42, 0, 13088, 42, 0, 13088, 42, 0, 13104, 42, 0, 13104, 42, 0, 13104, 42, 0, 13504, 42, 0, 13504, 42, 0, 13504, 42, 0, 13952, 0, 2863300608, 13952, 0, 2863300608, 13952, 0, 2863300608, 13952, 0, 2863300608, 13952, 0, 2863300608, 13952, 0, 2863300608, 13952, 0, 2863300608, 13952, 0, 2863300608, 13952, 0, 2863300608, 15488, 545392640, 136348168, 15488, 545392640, 136348168, 15488, 545392640, 136348168, 15488, 545392640, 136348168, 15488, 545392640, 136348168, 15488, 545392640, 136348168, 15488, 545392640, 136348168, 15488, 545392640, 136348168, 15488, 545392640, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441058722259944_353_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441058722259944_353_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3d3b4bad --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441058722259944_353_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((24 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1674 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1540, 73, 0, 1540, 73, 0, 1540, 73, 0, 1544, 73, 0, 1544, 73, 0, 1544, 73, 0, 1556, 73, 0, 1556, 73, 0, 1556, 73, 0, 1560, 73, 0, 1560, 73, 0, 1560, 73, 0, 1572, 73, 0, 1572, 73, 0, 1572, 73, 0, 1576, 73, 0, 1576, 73, 0, 1576, 73, 0, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2116, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2120, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2132, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2136, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2148, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2152, 1363481681, 340870420, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2436, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2440, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2452, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2456, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2468, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 2472, 4294967295, 4294967295, 4164, 2621696, 134217730, 4164, 2621696, 134217730, 4164, 2621696, 134217730, 4164, 2621696, 134217730, 4164, 2621696, 134217730, 4168, 2621696, 134217730, 4168, 2621696, 134217730, 4168, 2621696, 134217730, 4168, 2621696, 134217730, 4168, 2621696, 134217730, 4180, 2621696, 134217730, 4180, 2621696, 134217730, 4180, 2621696, 134217730, 4180, 2621696, 134217730, 4180, 2621696, 134217730, 4184, 2621696, 134217730, 4184, 2621696, 134217730, 4184, 2621696, 134217730, 4184, 2621696, 134217730, 4184, 2621696, 134217730, 4196, 2621696, 134217730, 4196, 2621696, 134217730, 4196, 2621696, 134217730, 4196, 2621696, 134217730, 4196, 2621696, 134217730, 4200, 2621696, 134217730, 4200, 2621696, 134217730, 4200, 2621696, 134217730, 4200, 2621696, 134217730, 4200, 2621696, 134217730] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441090327426323_354_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441090327426323_354_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..33eb191e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441090327426323_354_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 52))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 63))) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 62))) { + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1792, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1808, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 1824, 2, 2863310848, 4672, 170, 2852126720, 4672, 170, 2852126720, 4672, 170, 2852126720, 4672, 170, 2852126720, 4672, 170, 2852126720, 4672, 170, 2852126720, 4672, 170, 2852126720, 4672, 170, 2852126720, 4688, 170, 2852126720, 4688, 170, 2852126720, 4688, 170, 2852126720, 4688, 170, 2852126720, 4688, 170, 2852126720, 4688, 170, 2852126720, 4688, 170, 2852126720, 4688, 170, 2852126720, 4704, 170, 2852126720, 4704, 170, 2852126720, 4704, 170, 2852126720, 4704, 170, 2852126720, 4704, 170, 2852126720, 4704, 170, 2852126720, 4704, 170, 2852126720, 4704, 170, 2852126720, 6096, 402653184, 2147483648, 6096, 402653184, 2147483648, 6096, 402653184, 2147483648, 6112, 402653184, 2147483648, 6112, 402653184, 2147483648, 6112, 402653184, 2147483648, 12560, 0, 2147483648, 12576, 0, 2147483648, 14032, 1082130496, 536870944, 14032, 1082130496, 536870944, 14032, 1082130496, 536870944, 14032, 1082130496, 536870944, 14032, 1082130496, 536870944, 14048, 1082130496, 536870944, 14048, 1082130496, 536870944, 14048, 1082130496, 536870944, 14048, 1082130496, 536870944, 14048, 1082130496, 536870944] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441101008720339_355_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441101008720339_355_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e06abc4d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441101008720339_355_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() >= 50)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441124016874857_357_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441124016874857_357_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..89c160e2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441124016874857_357_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,168 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 58)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((68 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 23)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 0, 2818572288, 1216, 0, 2818572288, 1216, 0, 2818572288, 5200, 0, 2684354560, 5200, 0, 2684354560, 6464, 73, 0, 6464, 73, 0, 6464, 73, 0, 7040, 272696336, 68174084, 7040, 272696336, 68174084, 7040, 272696336, 68174084, 7040, 272696336, 68174084, 7040, 272696336, 68174084, 7040, 272696336, 68174084, 7040, 272696336, 68174084, 7040, 272696336, 68174084, 7040, 272696336, 68174084, 7040, 272696336, 68174084, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441124636728010_358_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441124636728010_358_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1aff50c1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441124636728010_358_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 35)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2304, 537002016, 2097664, 2304, 537002016, 2097664, 2304, 537002016, 2097664, 2304, 537002016, 2097664, 2304, 537002016, 2097664, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 2624, 1717986918, 1717986918, 3264, 72, 0, 3264, 72, 0, 3840, 4195328, 67125252, 3840, 4195328, 67125252, 3840, 4195328, 67125252, 3840, 4195328, 67125252, 3840, 4195328, 67125252, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200, 4160, 612517924, 1210352200] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441219815481818_361_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441219815481818_361_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c067a528 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441219815481818_361_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 48))) { + if ((WaveGetLaneIndex() < 24)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 51))) { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 34)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 1, 0, 3584, 1, 1430257664, 3584, 1, 1430257664, 3584, 1, 1430257664, 3584, 1, 1430257664, 3584, 1, 1430257664, 3584, 1, 1430257664] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441220052016432_362_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441220052016432_362_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..974fbd83 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441220052016432_362_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,201 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 23))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 17, 0, 1600, 17, 0, 8896, 67108864, 0, 9536, 73, 0, 9536, 73, 0, 9536, 73, 0, 10112, 272696336, 68174084, 10112, 272696336, 68174084, 10112, 272696336, 68174084, 10112, 272696336, 68174084, 10112, 272696336, 68174084, 10112, 272696336, 68174084, 10112, 272696336, 68174084, 10112, 272696336, 68174084, 10112, 272696336, 68174084, 10112, 272696336, 68174084, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513, 10432, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441222311902386_363_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441222311902386_363_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5198233a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441222311902386_363_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 42))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441222478057542_364_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441222478057542_364_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3c7bccee --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441222478057542_364_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,455 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 33)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 43))) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 31)) { + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (312 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((362 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + uint counter8 = 0; + while ((counter8 < 2)) { + counter8 = (counter8 + 1); + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((380 << 6) | (counter8 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i9 = 0; (i9 < 2); i9 = (i9 + 1)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((395 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((405 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((414 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((419 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((426 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((433 << 6) | (counter8 << 4)) | (i9 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i9 == 1)) { + continue; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (446 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (455 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 603 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 16, 1073741832, 1408, 16, 1073741832, 1408, 16, 1073741832, 1424, 16, 1073741832, 1424, 16, 1073741832, 1424, 16, 1073741832, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3520, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 3536, 1431655765, 1431655765, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4800, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4816, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 4832, 286331153, 286331153, 8900, 4096, 0, 8904, 4096, 0, 8916, 4096, 0, 8920, 4096, 0, 8932, 4096, 0, 8936, 4096, 0, 11776, 8738, 536870912, 11776, 8738, 536870912, 11776, 8738, 536870912, 11776, 8738, 536870912, 11776, 8738, 536870912, 12352, 572522496, 546, 12352, 572522496, 546, 12352, 572522496, 546, 12352, 572522496, 546, 12352, 572522496, 546, 12352, 572522496, 546, 13392, 572522496, 546, 13392, 572522496, 546, 13392, 572522496, 546, 13392, 572522496, 546, 13392, 572522496, 546, 13392, 572522496, 546, 13408, 572522496, 546, 13408, 572522496, 546, 13408, 572522496, 546, 13408, 572522496, 546, 13408, 572522496, 546, 13408, 572522496, 546, 16320, 572522496, 546, 16320, 572522496, 546, 16320, 572522496, 546, 16320, 572522496, 546, 16320, 572522496, 546, 16320, 572522496, 546, 21328, 0, 64, 21344, 0, 64, 22480, 0, 1145307136, 22480, 0, 1145307136, 22480, 0, 1145307136, 22480, 0, 1145307136, 22496, 0, 1145307136, 22496, 0, 1145307136, 22496, 0, 1145307136, 22496, 0, 1145307136, 23184, 0, 1073741824, 23200, 0, 1073741824, 24336, 2184, 2147483648, 24336, 2184, 2147483648, 24336, 2184, 2147483648, 24336, 2184, 2147483648, 24352, 2184, 2147483648, 24352, 2184, 2147483648, 24352, 2184, 2147483648, 24352, 2184, 2147483648, 27280, 559240, 0, 27280, 559240, 0, 27280, 559240, 0, 27280, 559240, 0, 27280, 559240, 0, 27284, 559240, 0, 27284, 559240, 0, 27284, 559240, 0, 27284, 559240, 0, 27284, 559240, 0, 27296, 559240, 0, 27296, 559240, 0, 27296, 559240, 0, 27296, 559240, 0, 27296, 559240, 0, 27300, 559240, 0, 27300, 559240, 0, 27300, 559240, 0, 27300, 559240, 0, 27300, 559240, 0, 27728, 0, 32768, 27732, 0, 32768, 27744, 0, 32768, 27748, 0, 32768, 28544, 85, 0, 28544, 85, 0, 28544, 85, 0, 28544, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441361046060192_365_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441361046060192_365_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e7b72528 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441361046060192_365_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,299 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 20)) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 61))) { + if (((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 52)) { + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((148 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((164 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 345 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5952, 4, 0, 6592, 73, 0, 6592, 73, 0, 6592, 73, 0, 7552, 16, 0, 7568, 16, 0, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8708, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8712, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8724, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 8728, 2181570690, 545392672, 9924, 146, 0, 9924, 146, 0, 9924, 146, 0, 9928, 146, 0, 9928, 146, 0, 9928, 146, 0, 9940, 146, 0, 9940, 146, 0, 9940, 146, 0, 9944, 146, 0, 9944, 146, 0, 9944, 146, 0, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10500, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10504, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10516, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 10520, 2181570690, 545392672, 15552, 36, 1224736768, 15552, 36, 1224736768, 15552, 36, 1224736768, 15552, 36, 1224736768, 15552, 36, 1224736768, 16832, 36, 1207959552, 16832, 36, 1207959552, 16832, 36, 1207959552, 16832, 36, 1207959552] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441369933056682_366_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441369933056682_366_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8dd85bfa --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441369933056682_366_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,165 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 3)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 50))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((81 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 123 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1744, 7, 0, 1744, 7, 0, 1744, 7, 0, 1760, 7, 0, 1760, 7, 0, 1760, 7, 0, 6528, 7, 0, 6528, 7, 0, 6528, 7, 0, 7168, 72, 0, 7168, 72, 0, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513, 8064, 613566752, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441404324003110_369_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441404324003110_369_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..513e4596 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441404324003110_369_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,288 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 57)) { + if ((WaveGetLaneIndex() >= 44)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 57))) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 117 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 4369, 268435456, 1936, 4369, 268435456, 1936, 4369, 268435456, 1936, 4369, 268435456, 1936, 4369, 268435456, 1952, 4369, 268435456, 1952, 4369, 268435456, 1952, 4369, 268435456, 1952, 4369, 268435456, 1952, 4369, 268435456, 2640, 1, 268435456, 2640, 1, 268435456, 2656, 1, 268435456, 2656, 1, 268435456, 3984, 0, 16, 4000, 0, 16, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 5568, 1145324612, 1145324612, 6656, 8, 0, 8640, 0, 536870912, 9088, 0, 536870912, 15616, 2080, 0, 15616, 2080, 0, 15632, 2080, 0, 15632, 2080, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441412209119710_370_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441412209119710_370_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8850a20a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441412209119710_370_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,152 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 31))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 42))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 54))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441512403583849_372_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441512403583849_372_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..458f1ac4 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441512403583849_372_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441877726491484_374_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441877726491484_374_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5fc26140 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441877726491484_374_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,464 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 52))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 57))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((273 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((283 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((292 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((297 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((301 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((308 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 48)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 55)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((356 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((363 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((370 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((392 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 36))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((414 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((430 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((443 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (454 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 621 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3648, 4095, 4294959104, 3392, 262144, 1, 3392, 262144, 1, 2752, 67141632, 0, 2752, 67141632, 0, 4288, 73, 0, 4288, 73, 0, 4288, 73, 0, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 15104, 613566756, 1227133513, 17472, 1, 286261248, 17472, 1, 286261248, 17472, 1, 286261248, 17472, 1, 286261248, 17476, 1, 286261248, 17476, 1, 286261248, 17476, 1, 286261248, 17476, 1, 286261248, 17480, 1, 286261248, 17480, 1, 286261248, 17480, 1, 286261248, 17480, 1, 286261248, 17488, 1, 286261248, 17488, 1, 286261248, 17488, 1, 286261248, 17488, 1, 286261248, 17492, 1, 286261248, 17492, 1, 286261248, 17492, 1, 286261248, 17492, 1, 286261248, 17496, 1, 286261248, 17496, 1, 286261248, 17496, 1, 286261248, 17496, 1, 286261248, 17504, 1, 286261248, 17504, 1, 286261248, 17504, 1, 286261248, 17504, 1, 286261248, 17508, 1, 286261248, 17508, 1, 286261248, 17508, 1, 286261248, 17508, 1, 286261248, 17512, 1, 286261248, 17512, 1, 286261248, 17512, 1, 286261248, 17512, 1, 286261248, 18112, 1, 0, 18116, 1, 0, 18120, 1, 0, 18128, 1, 0, 18132, 1, 0, 18136, 1, 0, 18144, 1, 0, 18148, 1, 0, 18152, 1, 0, 18688, 268501008, 1048832, 18688, 268501008, 1048832, 18688, 268501008, 1048832, 18688, 268501008, 1048832, 18688, 268501008, 1048832, 18692, 268501008, 1048832, 18692, 268501008, 1048832, 18692, 268501008, 1048832, 18692, 268501008, 1048832, 18692, 268501008, 1048832, 18696, 268501008, 1048832, 18696, 268501008, 1048832, 18696, 268501008, 1048832, 18696, 268501008, 1048832, 18696, 268501008, 1048832, 18704, 268501008, 1048832, 18704, 268501008, 1048832, 18704, 268501008, 1048832, 18704, 268501008, 1048832, 18704, 268501008, 1048832, 18708, 268501008, 1048832, 18708, 268501008, 1048832, 18708, 268501008, 1048832, 18708, 268501008, 1048832, 18708, 268501008, 1048832, 18712, 268501008, 1048832, 18712, 268501008, 1048832, 18712, 268501008, 1048832, 18712, 268501008, 1048832, 18712, 268501008, 1048832, 18720, 268501008, 1048832, 18720, 268501008, 1048832, 18720, 268501008, 1048832, 18720, 268501008, 1048832, 18720, 268501008, 1048832, 18724, 268501008, 1048832, 18724, 268501008, 1048832, 18724, 268501008, 1048832, 18724, 268501008, 1048832, 18724, 268501008, 1048832, 18728, 268501008, 1048832, 18728, 268501008, 1048832, 18728, 268501008, 1048832, 18728, 268501008, 1048832, 18728, 268501008, 1048832, 19008, 1048832, 16781313, 19008, 1048832, 16781313, 19008, 1048832, 16781313, 19008, 1048832, 16781313, 19008, 1048832, 16781313, 19012, 1048832, 16781313, 19012, 1048832, 16781313, 19012, 1048832, 16781313, 19012, 1048832, 16781313, 19012, 1048832, 16781313, 19016, 1048832, 16781313, 19016, 1048832, 16781313, 19016, 1048832, 16781313, 19016, 1048832, 16781313, 19016, 1048832, 16781313, 19024, 1048832, 16781313, 19024, 1048832, 16781313, 19024, 1048832, 16781313, 19024, 1048832, 16781313, 19024, 1048832, 16781313, 19028, 1048832, 16781313, 19028, 1048832, 16781313, 19028, 1048832, 16781313, 19028, 1048832, 16781313, 19028, 1048832, 16781313, 19032, 1048832, 16781313, 19032, 1048832, 16781313, 19032, 1048832, 16781313, 19032, 1048832, 16781313, 19032, 1048832, 16781313, 19040, 1048832, 16781313, 19040, 1048832, 16781313, 19040, 1048832, 16781313, 19040, 1048832, 16781313, 19040, 1048832, 16781313, 19044, 1048832, 16781313, 19044, 1048832, 16781313, 19044, 1048832, 16781313, 19044, 1048832, 16781313, 19044, 1048832, 16781313, 19048, 1048832, 16781313, 19048, 1048832, 16781313, 19048, 1048832, 16781313, 19048, 1048832, 16781313, 19048, 1048832, 16781313, 25104, 4, 0, 25120, 4, 0, 27540, 4194304, 0, 27544, 4194304, 0, 27556, 4194304, 0, 27560, 4194304, 0, 28368, 0, 4, 28384, 0, 4, 28800, 559240, 0, 28800, 559240, 0, 28800, 559240, 0, 28800, 559240, 0, 28800, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756441951111936250_375_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756441951111936250_375_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c9a464ee --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756441951111936250_375_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 117 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3136, 0, 32768, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 2368, 2863311530, 2863278762, 1984, 1, 16777280, 1984, 1, 16777280, 1984, 1, 16777280, 3776, 85, 0, 3776, 85, 0, 3776, 85, 0, 3776, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442144159751710_377_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442144159751710_377_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f941adc0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442144159751710_377_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,335 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 17)) { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() >= 63)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 34))) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 47)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 45))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((226 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 5632, 0, 2147483648, 16640, 85, 0, 16640, 85, 0, 16640, 85, 0, 16640, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442147692583926_378_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442147692583926_378_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1d34ab7b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442147692583926_378_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,327 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 1))) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 47)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 32))) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 339 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1744, 1170, 0, 1744, 1170, 0, 1744, 1170, 0, 1744, 1170, 0, 1760, 1170, 0, 1760, 1170, 0, 1760, 1170, 0, 1760, 1170, 0, 4496, 2181570688, 545392672, 4496, 2181570688, 545392672, 4496, 2181570688, 545392672, 4496, 2181570688, 545392672, 4496, 2181570688, 545392672, 4496, 2181570688, 545392672, 4496, 2181570688, 545392672, 4496, 2181570688, 545392672, 4496, 2181570688, 545392672, 4496, 2181570688, 545392672, 4512, 2181570688, 545392672, 4512, 2181570688, 545392672, 4512, 2181570688, 545392672, 4512, 2181570688, 545392672, 4512, 2181570688, 545392672, 4512, 2181570688, 545392672, 4512, 2181570688, 545392672, 4512, 2181570688, 545392672, 4512, 2181570688, 545392672, 4512, 2181570688, 545392672, 5328, 18, 603979776, 5328, 18, 603979776, 5328, 18, 603979776, 5328, 18, 603979776, 5344, 18, 603979776, 5344, 18, 603979776, 5344, 18, 603979776, 5344, 18, 603979776, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 6528, 85, 0, 6528, 85, 0, 6528, 85, 0, 6528, 85, 0, 10384, 585, 0, 10384, 585, 0, 10384, 585, 0, 10384, 585, 0, 10388, 585, 0, 10388, 585, 0, 10388, 585, 0, 10388, 585, 0, 10400, 585, 0, 10400, 585, 0, 10400, 585, 0, 10400, 585, 0, 10404, 585, 0, 10404, 585, 0, 10404, 585, 0, 10404, 585, 0, 11088, 16777216, 0, 11104, 16777216, 0, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513, 18304, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442286411841888_381_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442286411841888_381_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d07fccae --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442286411841888_381_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 3)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 13))) { + if ((WaveGetLaneIndex() == 56)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442286550074281_382_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442286550074281_382_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..92f0db0f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442286550074281_382_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,286 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 34))) { + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 447 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2752, 18, 613564416, 2752, 18, 613564416, 2752, 18, 613564416, 2752, 18, 613564416, 2752, 18, 613564416, 2752, 18, 613564416, 2752, 18, 613564416, 2752, 18, 613564416, 2756, 18, 613564416, 2756, 18, 613564416, 2756, 18, 613564416, 2756, 18, 613564416, 2756, 18, 613564416, 2756, 18, 613564416, 2756, 18, 613564416, 2756, 18, 613564416, 2760, 18, 613564416, 2760, 18, 613564416, 2760, 18, 613564416, 2760, 18, 613564416, 2760, 18, 613564416, 2760, 18, 613564416, 2760, 18, 613564416, 2760, 18, 613564416, 2768, 18, 613564416, 2768, 18, 613564416, 2768, 18, 613564416, 2768, 18, 613564416, 2768, 18, 613564416, 2768, 18, 613564416, 2768, 18, 613564416, 2768, 18, 613564416, 2772, 18, 613564416, 2772, 18, 613564416, 2772, 18, 613564416, 2772, 18, 613564416, 2772, 18, 613564416, 2772, 18, 613564416, 2772, 18, 613564416, 2772, 18, 613564416, 2776, 18, 613564416, 2776, 18, 613564416, 2776, 18, 613564416, 2776, 18, 613564416, 2776, 18, 613564416, 2776, 18, 613564416, 2776, 18, 613564416, 2776, 18, 613564416, 2784, 18, 613564416, 2784, 18, 613564416, 2784, 18, 613564416, 2784, 18, 613564416, 2784, 18, 613564416, 2784, 18, 613564416, 2784, 18, 613564416, 2784, 18, 613564416, 2788, 18, 613564416, 2788, 18, 613564416, 2788, 18, 613564416, 2788, 18, 613564416, 2788, 18, 613564416, 2788, 18, 613564416, 2788, 18, 613564416, 2788, 18, 613564416, 2792, 18, 613564416, 2792, 18, 613564416, 2792, 18, 613564416, 2792, 18, 613564416, 2792, 18, 613564416, 2792, 18, 613564416, 2792, 18, 613564416, 2792, 18, 613564416, 6144, 0, 4, 6148, 0, 4, 6152, 0, 4, 6160, 0, 4, 6164, 0, 4, 6168, 0, 4, 6176, 0, 4, 6180, 0, 4, 6184, 0, 4, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7744, 85, 0, 7744, 85, 0, 7744, 85, 0, 7744, 85, 0, 8960, 17, 0, 8960, 17, 0, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 9856, 1145324612, 1145324612, 11520, 2184, 2147483648, 11520, 2184, 2147483648, 11520, 2184, 2147483648, 11520, 2184, 2147483648, 11536, 2184, 2147483648, 11536, 2184, 2147483648, 11536, 2184, 2147483648, 11536, 2184, 2147483648, 13440, 2184, 2147483648, 13440, 2184, 2147483648, 13440, 2184, 2147483648, 13440, 2184, 2147483648, 13456, 2184, 2147483648, 13456, 2184, 2147483648, 13456, 2184, 2147483648, 13456, 2184, 2147483648, 14400, 0, 2048, 14416, 0, 2048, 15808, 8, 2147483648, 15808, 8, 2147483648, 15824, 8, 2147483648, 15824, 8, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442408265128710_383_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442408265128710_383_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..367dad09 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442408265128710_383_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 38))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 54)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 558 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 0, 2454257664, 1088, 0, 2454257664, 1088, 0, 2454257664, 1088, 0, 2454257664, 1088, 0, 2454257664, 1088, 0, 2454257664, 1104, 0, 2454257664, 1104, 0, 2454257664, 1104, 0, 2454257664, 1104, 0, 2454257664, 1104, 0, 2454257664, 1104, 0, 2454257664, 1120, 0, 2454257664, 1120, 0, 2454257664, 1120, 0, 2454257664, 1120, 0, 2454257664, 1120, 0, 2454257664, 1120, 0, 2454257664, 4032, 0, 2415919104, 4032, 0, 2415919104, 4048, 0, 2415919104, 4048, 0, 2415919104, 4064, 0, 2415919104, 4064, 0, 2415919104, 5072, 272696336, 68174084, 5072, 272696336, 68174084, 5072, 272696336, 68174084, 5072, 272696336, 68174084, 5072, 272696336, 68174084, 5072, 272696336, 68174084, 5072, 272696336, 68174084, 5072, 272696336, 68174084, 5072, 272696336, 68174084, 5072, 272696336, 68174084, 5088, 272696336, 68174084, 5088, 272696336, 68174084, 5088, 272696336, 68174084, 5088, 272696336, 68174084, 5088, 272696336, 68174084, 5088, 272696336, 68174084, 5088, 272696336, 68174084, 5088, 272696336, 68174084, 5088, 272696336, 68174084, 5088, 272696336, 68174084, 5104, 272696336, 68174084, 5104, 272696336, 68174084, 5104, 272696336, 68174084, 5104, 272696336, 68174084, 5104, 272696336, 68174084, 5104, 272696336, 68174084, 5104, 272696336, 68174084, 5104, 272696336, 68174084, 5104, 272696336, 68174084, 5104, 272696336, 68174084, 5648, 272696336, 68174084, 5648, 272696336, 68174084, 5648, 272696336, 68174084, 5648, 272696336, 68174084, 5648, 272696336, 68174084, 5648, 272696336, 68174084, 5648, 272696336, 68174084, 5648, 272696336, 68174084, 5648, 272696336, 68174084, 5648, 272696336, 68174084, 5664, 272696336, 68174084, 5664, 272696336, 68174084, 5664, 272696336, 68174084, 5664, 272696336, 68174084, 5664, 272696336, 68174084, 5664, 272696336, 68174084, 5664, 272696336, 68174084, 5664, 272696336, 68174084, 5664, 272696336, 68174084, 5664, 272696336, 68174084, 5680, 272696336, 68174084, 5680, 272696336, 68174084, 5680, 272696336, 68174084, 5680, 272696336, 68174084, 5680, 272696336, 68174084, 5680, 272696336, 68174084, 5680, 272696336, 68174084, 5680, 272696336, 68174084, 5680, 272696336, 68174084, 5680, 272696336, 68174084, 7616, 73, 0, 7616, 73, 0, 7616, 73, 0, 8192, 272696336, 68174084, 8192, 272696336, 68174084, 8192, 272696336, 68174084, 8192, 272696336, 68174084, 8192, 272696336, 68174084, 8192, 272696336, 68174084, 8192, 272696336, 68174084, 8192, 272696336, 68174084, 8192, 272696336, 68174084, 8192, 272696336, 68174084, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 8512, 613566756, 1227133513, 10384, 0, 2454266880, 10384, 0, 2454266880, 10384, 0, 2454266880, 10384, 0, 2454266880, 10384, 0, 2454266880, 10384, 0, 2454266880, 10384, 0, 2454266880, 10384, 0, 2454266880, 10400, 0, 2454266880, 10400, 0, 2454266880, 10400, 0, 2454266880, 10400, 0, 2454266880, 10400, 0, 2454266880, 10400, 0, 2454266880, 10400, 0, 2454266880, 10400, 0, 2454266880, 10944, 272696336, 68174084, 10944, 272696336, 68174084, 10944, 272696336, 68174084, 10944, 272696336, 68174084, 10944, 272696336, 68174084, 10944, 272696336, 68174084, 10944, 272696336, 68174084, 10944, 272696336, 68174084, 10944, 272696336, 68174084, 10944, 272696336, 68174084, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269, 11264, 3067833782, 1840700269] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442434179207346_384_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442434179207346_384_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6989bc51 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442434179207346_384_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,153 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 30)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 5, 1430257664, 1344, 5, 1430257664, 1344, 5, 1430257664, 1344, 5, 1430257664, 1344, 5, 1430257664, 1344, 5, 1430257664, 1344, 5, 1430257664, 1360, 5, 1430257664, 1360, 5, 1430257664, 1360, 5, 1430257664, 1360, 5, 1430257664, 1360, 5, 1430257664, 1360, 5, 1430257664, 1360, 5, 1430257664, 3200, 4096, 4194304, 3200, 4096, 4194304, 3204, 4096, 4194304, 3204, 4096, 4194304, 3208, 4096, 4194304, 3208, 4096, 4194304, 3216, 4096, 4194304, 3216, 4096, 4194304, 3220, 4096, 4194304, 3220, 4096, 4194304, 3224, 4096, 4194304, 3224, 4096, 4194304, 4672, 17825796, 67108864, 4672, 17825796, 67108864, 4672, 17825796, 67108864, 4672, 17825796, 67108864, 4676, 17825796, 67108864, 4676, 17825796, 67108864, 4676, 17825796, 67108864, 4676, 17825796, 67108864, 4680, 17825796, 67108864, 4680, 17825796, 67108864, 4680, 17825796, 67108864, 4680, 17825796, 67108864, 4688, 17825796, 67108864, 4688, 17825796, 67108864, 4688, 17825796, 67108864, 4688, 17825796, 67108864, 4692, 17825796, 67108864, 4692, 17825796, 67108864, 4692, 17825796, 67108864, 4692, 17825796, 67108864, 4696, 17825796, 67108864, 4696, 17825796, 67108864, 4696, 17825796, 67108864, 4696, 17825796, 67108864, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520, 6656, 0, 2863311520] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442453373305181_385_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442453373305181_385_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c2c39fe0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442453373305181_385_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 372 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 1088, 4095, 4026531840, 2560, 0, 2048, 2576, 0, 2048, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4036, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4040, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4044, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4052, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4056, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 4060, 2730, 2863310848, 6208, 0, 2147516416, 6208, 0, 2147516416, 6224, 0, 2147516416, 6224, 0, 2147516416] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442459366235868_386_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442459366235868_386_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..97d7f32f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442459366235868_386_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 4032, 1431655765, 1431655765, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530, 3648, 2863311530, 2863311530] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442809848076260_390_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442809848076260_390_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4bc3976f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442809848076260_390_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2960, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2976, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345, 2992, 68174084, 1090785345] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442810448611085_391_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442810448611085_391_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..db12b75b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442810448611085_391_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,281 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 13)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 16))) { + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 1616, 286331153, 286331153, 2768, 78643, 805306368, 2768, 78643, 805306368, 2768, 78643, 805306368, 2768, 78643, 805306368, 2768, 78643, 805306368, 2768, 78643, 805306368, 2768, 78643, 805306368, 2768, 78643, 805306368, 2768, 78643, 805306368, 2768, 78643, 805306368, 2768, 78643, 805306368, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4432, 572662306, 572662306, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 4928, 1145324612, 1145324612, 5376, 559240, 0, 5376, 559240, 0, 5376, 559240, 0, 5376, 559240, 0, 5376, 559240, 0, 6016, 73, 0, 6016, 73, 0, 6016, 73, 0, 13056, 272696336, 68174084, 13056, 272696336, 68174084, 13056, 272696336, 68174084, 13056, 272696336, 68174084, 13056, 272696336, 68174084, 13056, 272696336, 68174084, 13056, 272696336, 68174084, 13056, 272696336, 68174084, 13056, 272696336, 68174084, 13056, 272696336, 68174084, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513, 13696, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442816978640569_392_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442816978640569_392_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1da5f6cf --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442816978640569_392_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 50))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442817621328897_393_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442817621328897_393_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b59c1dae --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442817621328897_393_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 49))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5824, 0, 1426063360, 5824, 0, 1426063360, 5824, 0, 1426063360, 5824, 0, 1426063360, 5840, 0, 1426063360, 5840, 0, 1426063360, 5840, 0, 1426063360, 5840, 0, 1426063360] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442858198426971_396_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442858198426971_396_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8c03c910 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442858198426971_396_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 42))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756442858372156701_397_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756442858372156701_397_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a55097c6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756442858372156701_397_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,169 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 40)) { + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 11))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3152, 2, 536870912, 3152, 2, 536870912, 3168, 2, 536870912, 3168, 2, 536870912, 3456, 537002016, 2097664, 3456, 537002016, 2097664, 3456, 537002016, 2097664, 3456, 537002016, 2097664, 3456, 537002016, 2097664, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612, 3776, 1145324612, 1145324612] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443305764619238_402_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443305764619238_402_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..11765953 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443305764619238_402_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,353 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((186 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 43)) { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((222 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + break; + } + case 3: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 4288, 545392672, 136348168, 4288, 545392672, 136348168, 4288, 545392672, 136348168, 4288, 545392672, 136348168, 4288, 545392672, 136348168, 4288, 545392672, 136348168, 4288, 545392672, 136348168, 4288, 545392672, 136348168, 4288, 545392672, 136348168, 4288, 545392672, 136348168, 5504, 0, 1073741888, 5504, 0, 1073741888, 6144, 4, 0, 7872, 17, 0, 7872, 17, 0, 14212, 0, 1140850688, 14212, 0, 1140850688, 14228, 0, 1140850688, 14228, 0, 1140850688, 14244, 0, 1140850688, 14244, 0, 1140850688, 15428, 64, 0, 15444, 64, 0, 15460, 64, 0, 17424, 136, 0, 17424, 136, 0, 17440, 136, 0, 17440, 136, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443338513855520_405_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443338513855520_405_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..dafc476c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443338513855520_405_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 22)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 31)) { + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 54))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443338688891761_406_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443338688891761_406_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a8095a09 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443338688891761_406_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 4, 0, 1536, 16, 8192, 1536, 16, 8192, 1152, 32768, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0, 2560, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443346090149154_408_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443346090149154_408_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48ed53b0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443346090149154_408_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,386 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 45))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 51))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 62))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (328 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (371 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (380 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (392 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (411 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (439 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 1073741824, 1280, 1, 1073741824, 4800, 84, 0, 4800, 84, 0, 4800, 84, 0, 7936, 8, 2181038080, 7936, 8, 2181038080, 7936, 8, 2181038080, 7952, 8, 2181038080, 7952, 8, 2181038080, 7952, 8, 2181038080, 7968, 8, 2181038080, 7968, 8, 2181038080, 7968, 8, 2181038080, 9856, 8, 2181038080, 9856, 8, 2181038080, 9856, 8, 2181038080, 9872, 8, 2181038080, 9872, 8, 2181038080, 9872, 8, 2181038080, 9888, 8, 2181038080, 9888, 8, 2181038080, 9888, 8, 2181038080, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 18944, 613566756, 1227133513, 26880, 81920, 1073741824, 26880, 81920, 1073741824, 26880, 81920, 1073741824, 28096, 65536, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443454716953060_410_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443454716953060_410_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d62ac337 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443454716953060_410_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,110 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 45))) { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((75 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3904, 1048576, 0, 3908, 1048576, 0, 3912, 1048576, 0, 3920, 1048576, 0, 3924, 1048576, 0, 3928, 1048576, 0, 3936, 1048576, 0, 3940, 1048576, 0, 3944, 1048576, 0, 4800, 1048576, 0, 4804, 1048576, 0, 4808, 1048576, 0, 4816, 1048576, 0, 4820, 1048576, 0, 4824, 1048576, 0, 4832, 1048576, 0, 4836, 1048576, 0, 4840, 1048576, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443455120719363_411_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443455120719363_411_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1c66952c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443455120719363_411_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,370 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 50)) { + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((139 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((230 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((266 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 16))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 51))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((438 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (461 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1371 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 0, 4, 3904, 32, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 6080, 258015, 0, 11344, 268435712, 2056, 11344, 268435712, 2056, 11344, 268435712, 2056, 11344, 268435712, 2056, 11360, 268435712, 2056, 11360, 268435712, 2056, 11360, 268435712, 2056, 11360, 268435712, 2056, 11968, 0, 4257218560, 11968, 0, 4257218560, 11968, 0, 4257218560, 11968, 0, 4257218560, 11968, 0, 4257218560, 11968, 0, 4257218560, 11968, 0, 4257218560, 11968, 0, 4257218560, 11968, 0, 4257218560, 14740, 4096, 1073741824, 14740, 4096, 1073741824, 14744, 4096, 1073741824, 14744, 4096, 1073741824, 14748, 4096, 1073741824, 14748, 4096, 1073741824, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17040, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17044, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17048, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17056, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17060, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17064, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17072, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17076, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17080, 524287, 4294966272, 17616, 32, 0, 17632, 32, 0, 17648, 32, 0, 18752, 7, 3221225472, 18752, 7, 3221225472, 18752, 7, 3221225472, 18752, 7, 3221225472, 18752, 7, 3221225472, 21184, 0, 1048576, 21504, 4, 0, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 3, 4286578688, 23552, 4, 0, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 24128, 1431655760, 349525, 29504, 0, 64] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443615960091671_412_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443615960091671_412_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5b36096c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443615960091671_412_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,166 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 36)) { + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 0, 33554432, 1616, 0, 33554432, 1632, 0, 33554432, 2816, 134217728, 1024, 2816, 134217728, 1024, 2820, 134217728, 1024, 2820, 134217728, 1024, 2832, 134217728, 1024, 2832, 134217728, 1024, 2836, 134217728, 1024, 2836, 134217728, 1024, 2848, 134217728, 1024, 2848, 134217728, 1024, 2852, 134217728, 1024, 2852, 134217728, 1024, 3392, 272696336, 68174084, 3392, 272696336, 68174084, 3392, 272696336, 68174084, 3392, 272696336, 68174084, 3392, 272696336, 68174084, 3392, 272696336, 68174084, 3392, 272696336, 68174084, 3392, 272696336, 68174084, 3392, 272696336, 68174084, 3392, 272696336, 68174084, 4544, 0, 1227133440, 4544, 0, 1227133440, 4544, 0, 1227133440, 4544, 0, 1227133440, 4544, 0, 1227133440, 4544, 0, 1227133440, 4544, 0, 1227133440, 4544, 0, 1227133440, 4560, 0, 1227133440, 4560, 0, 1227133440, 4560, 0, 1227133440, 4560, 0, 1227133440, 4560, 0, 1227133440, 4560, 0, 1227133440, 4560, 0, 1227133440, 4560, 0, 1227133440, 4576, 0, 1227133440, 4576, 0, 1227133440, 4576, 0, 1227133440, 4576, 0, 1227133440, 4576, 0, 1227133440, 4576, 0, 1227133440, 4576, 0, 1227133440, 4576, 0, 1227133440, 4992, 0, 1227133504, 4992, 0, 1227133504, 4992, 0, 1227133504, 4992, 0, 1227133504, 4992, 0, 1227133504, 4992, 0, 1227133504, 4992, 0, 1227133504, 4992, 0, 1227133504, 4992, 0, 1227133504, 5008, 0, 1227133504, 5008, 0, 1227133504, 5008, 0, 1227133504, 5008, 0, 1227133504, 5008, 0, 1227133504, 5008, 0, 1227133504, 5008, 0, 1227133504, 5008, 0, 1227133504, 5008, 0, 1227133504, 5024, 0, 1227133504, 5024, 0, 1227133504, 5024, 0, 1227133504, 5024, 0, 1227133504, 5024, 0, 1227133504, 5024, 0, 1227133504, 5024, 0, 1227133504, 5024, 0, 1227133504, 5024, 0, 1227133504, 5440, 256, 0, 5456, 256, 0, 5472, 256, 0, 6400, 8388608, 0, 6416, 8388608, 0, 6432, 8388608, 0, 7040, 85, 0, 7040, 85, 0, 7040, 85, 0, 7040, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443636169428782_413_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443636169428782_413_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2acad3fc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443636169428782_413_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,243 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 35))) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 44))) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 43))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 2048, 16, 68173824, 2048, 16, 68173824, 2048, 16, 68173824, 2048, 16, 68173824, 4416, 16, 68157440, 4416, 16, 68157440, 4416, 16, 68157440, 5376, 1024, 0, 5392, 1024, 0, 5408, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443637742848232_414_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443637742848232_414_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3b9e48b3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443637742848232_414_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,97 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2256, 0, 67108880, 2256, 0, 67108880, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3412, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400, 3416, 341, 1431654400] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443638742081645_415_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443638742081645_415_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bdb19b58 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443638742081645_415_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,326 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 57))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 62))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 63))) { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 52))) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((211 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((i1 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((284 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (311 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 60))) { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (341 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (371 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (381 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((403 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 3: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((WaveGetLaneIndex() < 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((428 << 6) | (counter6 << 4)) | (counter7 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9856, 16, 1073741824, 9856, 16, 1073741824, 11328, 1, 286326784, 11328, 1, 286326784, 11328, 1, 286326784, 11328, 1, 286326784, 11328, 1, 286326784, 13504, 1, 286261248, 13504, 1, 286261248, 13504, 1, 286261248, 13504, 1, 286261248, 13508, 1, 286261248, 13508, 1, 286261248, 13508, 1, 286261248, 13508, 1, 286261248, 13520, 1, 286261248, 13520, 1, 286261248, 13520, 1, 286261248, 13520, 1, 286261248, 13524, 1, 286261248, 13524, 1, 286261248, 13524, 1, 286261248, 13524, 1, 286261248, 16980, 4096, 0, 16984, 4096, 0, 16996, 4096, 0, 17000, 4096, 0, 17012, 4096, 0, 17016, 4096, 0, 18640, 0, 256, 18656, 0, 256, 18672, 0, 256, 19328, 272, 0, 19328, 272, 0, 24384, 68, 0, 24384, 68, 0, 27412, 559240, 0, 27412, 559240, 0, 27412, 559240, 0, 27412, 559240, 0, 27412, 559240, 0, 27416, 559240, 0, 27416, 559240, 0, 27416, 559240, 0, 27416, 559240, 0, 27416, 559240, 0, 27428, 559240, 0, 27428, 559240, 0, 27428, 559240, 0, 27428, 559240, 0, 27428, 559240, 0, 27432, 559240, 0, 27432, 559240, 0, 27432, 559240, 0, 27432, 559240, 0, 27432, 559240, 0, 27444, 559240, 0, 27444, 559240, 0, 27444, 559240, 0, 27444, 559240, 0, 27444, 559240, 0, 27448, 559240, 0, 27448, 559240, 0, 27448, 559240, 0, 27448, 559240, 0, 27448, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443672843637163_416_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443672843637163_416_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..81369783 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443672843637163_416_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 53))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 49)) { + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((96 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1664, 81, 0, 1664, 81, 0, 1664, 81, 0, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 3200, 1755, 3066036224, 4112, 67108864, 0, 4128, 67108864, 0, 4144, 67108864, 0, 6928, 1048576, 0, 6944, 1048576, 0, 6960, 1048576, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443695057222883_418_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443695057222883_418_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d91be4c6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443695057222883_418_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3136, 4195328, 4456512, 3136, 4195328, 4456512, 3136, 4195328, 4456512, 3136, 4195328, 4456512, 3136, 4195328, 4456512, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2752, 31743, 0, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2496, 2863300608, 2863311530, 2112, 65536, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443695532976505_419_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443695532976505_419_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..469855be --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443695532976505_419_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 51))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 61))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 159 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 262144, 128, 1984, 262144, 128, 2000, 262144, 128, 2000, 262144, 128, 2016, 262144, 128, 2016, 262144, 128, 5504, 262144, 0, 5508, 262144, 0, 5512, 262144, 0, 5520, 262144, 0, 5524, 262144, 0, 5528, 262144, 0, 5536, 262144, 0, 5540, 262144, 0, 5544, 262144, 0, 6080, 262144, 0, 6084, 262144, 0, 6088, 262144, 0, 6096, 262144, 0, 6100, 262144, 0, 6104, 262144, 0, 6112, 262144, 0, 6116, 262144, 0, 6120, 262144, 0, 7808, 272696336, 68174084, 7808, 272696336, 68174084, 7808, 272696336, 68174084, 7808, 272696336, 68174084, 7808, 272696336, 68174084, 7808, 272696336, 68174084, 7808, 272696336, 68174084, 7808, 272696336, 68174084, 7808, 272696336, 68174084, 7808, 272696336, 68174084, 9744, 545392672, 136348168, 9744, 545392672, 136348168, 9744, 545392672, 136348168, 9744, 545392672, 136348168, 9744, 545392672, 136348168, 9744, 545392672, 136348168, 9744, 545392672, 136348168, 9744, 545392672, 136348168, 9744, 545392672, 136348168, 9744, 545392672, 136348168, 13648, 537004064, 136348168, 13648, 537004064, 136348168, 13648, 537004064, 136348168, 13648, 537004064, 136348168, 13648, 537004064, 136348168, 13648, 537004064, 136348168, 13648, 537004064, 136348168, 13648, 537004064, 136348168, 13648, 537004064, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443700184280713_420_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443700184280713_420_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa5157f1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443700184280713_420_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,591 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 53))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 34))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (280 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 30)) { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((344 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (363 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (381 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (404 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (414 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((440 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((454 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((461 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (470 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (480 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (489 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (494 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (498 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (507 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (512 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (519 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (523 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 435 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4676, 0, 4096, 4680, 0, 4096, 4684, 0, 4096, 4692, 0, 4096, 4696, 0, 4096, 4700, 0, 4096, 6720, 2, 536870912, 6720, 2, 536870912, 9168, 0, 35790848, 9168, 0, 35790848, 9168, 0, 35790848, 9168, 0, 35790848, 9184, 0, 35790848, 9184, 0, 35790848, 9184, 0, 35790848, 9184, 0, 35790848, 12560, 32, 35651584, 12560, 32, 35651584, 12560, 32, 35651584, 12576, 32, 35651584, 12576, 32, 35651584, 12576, 32, 35651584, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 13824, 1717986918, 1717986918, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 14272, 978670, 0, 15168, 17, 0, 15168, 17, 0, 17280, 0, 572653568, 17280, 0, 572653568, 17280, 0, 572653568, 17280, 0, 572653568, 20816, 4473924, 0, 20816, 4473924, 0, 20816, 4473924, 0, 20816, 4473924, 0, 20816, 4473924, 0, 20816, 4473924, 0, 20832, 4473924, 0, 20832, 4473924, 0, 20832, 4473924, 0, 20832, 4473924, 0, 20832, 4473924, 0, 20832, 4473924, 0, 22032, 0, 262144, 22048, 0, 262144, 23232, 134250504, 2148008064, 23232, 134250504, 2148008064, 23232, 134250504, 2148008064, 23232, 134250504, 2148008064, 23232, 134250504, 2148008064, 23232, 134250504, 2148008064, 25536, 128, 0, 25856, 8390656, 134250504, 25856, 8390656, 134250504, 25856, 8390656, 134250504, 25856, 8390656, 134250504, 25856, 8390656, 134250504, 26496, 17, 0, 26496, 17, 0, 31616, 537002016, 2097664, 31616, 537002016, 2097664, 31616, 537002016, 2097664, 31616, 537002016, 2097664, 31616, 537002016, 2097664, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32448, 572662306, 572662306, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 32768, 1145324612, 1145324612, 33216, 559240, 0, 33216, 559240, 0, 33216, 559240, 0, 33216, 559240, 0, 33216, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443745892365882_421_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443745892365882_421_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fac1dd2e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443745892365882_421_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((35 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2256, 0, 128, 2260, 0, 128, 2264, 0, 128, 2272, 0, 128, 2276, 0, 128, 2280, 0, 128, 2288, 0, 128, 2292, 0, 128, 2296, 0, 128, 3216, 8192, 2, 3216, 8192, 2, 3220, 8192, 2, 3220, 8192, 2, 3224, 8192, 2, 3224, 8192, 2, 3232, 8192, 2, 3232, 8192, 2, 3236, 8192, 2, 3236, 8192, 2, 3240, 8192, 2, 3240, 8192, 2, 3248, 8192, 2, 3248, 8192, 2, 3252, 8192, 2, 3252, 8192, 2, 3256, 8192, 2, 3256, 8192, 2] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443812461053585_425_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443812461053585_425_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b54b00e7 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443812461053585_425_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,277 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((216 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((225 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 387 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 3664, 16, 0, 3680, 16, 0, 4368, 1040, 67108864, 4368, 1040, 67108864, 4368, 1040, 67108864, 4384, 1040, 67108864, 4384, 1040, 67108864, 4384, 1040, 67108864, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 5184, 68174084, 1090785345, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7120, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 7136, 87381, 1430257664, 8272, 21, 1430257664, 8272, 21, 1430257664, 8272, 21, 1430257664, 8272, 21, 1430257664, 8272, 21, 1430257664, 8272, 21, 1430257664, 8272, 21, 1430257664, 8272, 21, 1430257664, 8288, 21, 1430257664, 8288, 21, 1430257664, 8288, 21, 1430257664, 8288, 21, 1430257664, 8288, 21, 1430257664, 8288, 21, 1430257664, 8288, 21, 1430257664, 8288, 21, 1430257664, 8976, 21, 1431306240, 8976, 21, 1431306240, 8976, 21, 1431306240, 8976, 21, 1431306240, 8976, 21, 1431306240, 8976, 21, 1431306240, 8976, 21, 1431306240, 8976, 21, 1431306240, 8976, 21, 1431306240, 8992, 21, 1431306240, 8992, 21, 1431306240, 8992, 21, 1431306240, 8992, 21, 1431306240, 8992, 21, 1431306240, 8992, 21, 1431306240, 8992, 21, 1431306240, 8992, 21, 1431306240, 8992, 21, 1431306240, 11664, 16384, 262144, 11664, 16384, 262144, 11680, 16384, 262144, 11680, 16384, 262144, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12368, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 12384, 21845, 1431568384, 14420, 0, 8, 14424, 0, 8, 14428, 0, 8, 14436, 0, 8, 14440, 0, 8, 14444, 0, 8, 14452, 0, 8, 14456, 0, 8, 14460, 0, 8, 14864, 134217728, 0, 14880, 134217728, 0, 14896, 134217728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443959999293407_427_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443959999293407_427_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7dab48ec --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443959999293407_427_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,77 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((26 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1684, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1688, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1700, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248, 1704, 1, 4294965248] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756443963092712483_428_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756443963092712483_428_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a7e0d2ce --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756443963092712483_428_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,316 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 63))) { + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 51))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 52))) { + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((211 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (257 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((291 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((310 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((329 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 16777728, 1024, 1984, 16777728, 1024, 1984, 16777728, 1024, 4352, 272696336, 68174084, 4352, 272696336, 68174084, 4352, 272696336, 68174084, 4352, 272696336, 68174084, 4352, 272696336, 68174084, 4352, 272696336, 68174084, 4352, 272696336, 68174084, 4352, 272696336, 68174084, 4352, 272696336, 68174084, 4352, 272696336, 68174084, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 7680, 613566756, 1227133513, 10896, 16384, 1048576, 10896, 16384, 1048576, 10912, 16384, 1048576, 10912, 16384, 1048576, 14720, 17, 0, 14720, 17, 0, 16448, 4, 1073741824, 16448, 4, 1073741824, 19844, 0, 128, 19848, 0, 128, 19860, 0, 128, 19864, 0, 128, 21056, 524288, 2048, 21056, 524288, 2048, 21072, 524288, 2048, 21072, 524288, 2048] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444032615133282_433_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444032615133282_433_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4635660f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444032615133282_433_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,373 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 49))) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (332 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((359 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (403 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (412 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (424 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (435 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 303 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 9920, 272696336, 68174084, 9920, 272696336, 68174084, 9920, 272696336, 68174084, 9920, 272696336, 68174084, 9920, 272696336, 68174084, 9920, 272696336, 68174084, 9920, 272696336, 68174084, 9920, 272696336, 68174084, 9920, 272696336, 68174084, 9920, 272696336, 68174084, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 14848, 613566756, 1227133513, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 25152, 2326440618, 2863311530, 27136, 559240, 0, 27136, 559240, 0, 27136, 559240, 0, 27136, 559240, 0, 27136, 559240, 0, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530, 27840, 2326440618, 2863311530] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444034407814212_434_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444034407814212_434_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cb4437f6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444034407814212_434_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,127 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 48))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 63))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (counter0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6992, 131074, 0, 6992, 131074, 0, 6996, 131074, 0, 6996, 131074, 0, 7008, 131074, 0, 7008, 131074, 0, 7012, 131074, 0, 7012, 131074, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444034719925064_435_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444034719925064_435_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d23ff819 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444034719925064_435_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,198 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 61)) { + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 0, 536870912, 992, 0, 536870912, 2896, 0, 536870912, 2912, 0, 536870912, 3792, 32768, 0, 3808, 32768, 0, 4800, 85, 0, 4800, 85, 0, 4800, 85, 0, 4800, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444036747999997_436_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444036747999997_436_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..de9a9d47 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444036747999997_436_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,335 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 53))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 11)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((217 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((232 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((253 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((335 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((373 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((384 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 873 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3200, 8388608, 2097160, 3200, 8388608, 2097160, 3200, 8388608, 2097160, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 5504, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6672, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 6688, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7568, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 7584, 2854922922, 2861214370, 8272, 21, 1073741824, 8272, 21, 1073741824, 8272, 21, 1073741824, 8272, 21, 1073741824, 8288, 21, 1073741824, 8288, 21, 1073741824, 8288, 21, 1073741824, 8288, 21, 1073741824, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8848, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 8864, 1431655765, 1431655765, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 9408, 2854922922, 2861214370, 10048, 73, 0, 10048, 73, 0, 10048, 73, 0, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 17152, 613566756, 1227133513, 19008, 2147483664, 16785408, 19008, 2147483664, 16785408, 19008, 2147483664, 16785408, 19008, 2147483664, 16785408, 19024, 2147483664, 16785408, 19024, 2147483664, 16785408, 19024, 2147483664, 16785408, 19024, 2147483664, 16785408, 19040, 2147483664, 16785408, 19040, 2147483664, 16785408, 19040, 2147483664, 16785408, 19040, 2147483664, 16785408, 24576, 65536, 1, 24576, 65536, 1, 24592, 65536, 1, 24592, 65536, 1, 24608, 65536, 1, 24608, 65536, 1] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444056343344482_437_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444056343344482_437_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6e7f5af2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444056343344482_437_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,145 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 46))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 54)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 15 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4672, 0, 16384, 5312, 85, 0, 5312, 85, 0, 5312, 85, 0, 5312, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444096247580873_439_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444096247580873_439_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..74a07dbe --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444096247580873_439_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,134 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 1, 0, 2304, 4161, 0, 2304, 4161, 0, 2304, 4161, 0, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 4160, 260, 1090519040, 4160, 260, 1090519040, 4160, 260, 1090519040, 4160, 260, 1090519040, 4864, 260, 1090519040, 4864, 260, 1090519040, 4864, 260, 1090519040, 4864, 260, 1090519040] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444112229559239_441_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444112229559239_441_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..be125e0c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444112229559239_441_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,333 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 29)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 55))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((202 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 18))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((279 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((337 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((360 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((375 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 456 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1024, 136348168, 2181570690, 1792, 520, 0, 1792, 520, 0, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2368, 136348168, 2181570690, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 2944, 1090785345, 272696336, 4112, 65, 0, 4112, 65, 0, 4128, 65, 0, 4128, 65, 0, 4816, 1, 272696320, 4816, 1, 272696320, 4816, 1, 272696320, 4816, 1, 272696320, 4816, 1, 272696320, 4832, 1, 272696320, 4832, 1, 272696320, 4832, 1, 272696320, 4832, 1, 272696320, 4832, 1, 272696320, 8144, 32768, 0, 8160, 32768, 0, 8176, 32768, 0, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 9536, 613566756, 1227133513, 10432, 2097152, 0, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 11008, 2861214378, 2863311530, 20160, 67108864, 1073741824, 20160, 67108864, 1073741824, 20176, 67108864, 1073741824, 20176, 67108864, 1073741824, 21572, 1024, 0, 21576, 1024, 0, 21588, 1024, 0, 21592, 1024, 0, 24000, 0, 16384, 24016, 0, 16384, 24448, 559240, 0, 24448, 559240, 0, 24448, 559240, 0, 24448, 559240, 0, 24448, 559240, 0, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444271124384696_442_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444271124384696_442_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..252a1dff --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444271124384696_442_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,290 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 50))) { + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 47))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 55))) { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((220 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((241 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9408, 572662304, 8738, 9408, 572662304, 8738, 9408, 572662304, 8738, 9408, 572662304, 8738, 9408, 572662304, 8738, 9408, 572662304, 8738, 9408, 572662304, 8738, 9408, 572662304, 8738, 9408, 572662304, 8738, 9408, 572662304, 8738, 9408, 572662304, 8738, 11396, 0, 8704, 11396, 0, 8704, 11412, 0, 8704, 11412, 0, 8704, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 12480, 1145324612, 1145324612, 14096, 0, 8388608, 14112, 0, 8388608, 14128, 0, 8388608, 14996, 0, 8388608, 15000, 0, 8388608, 15004, 0, 8388608, 15012, 0, 8388608, 15016, 0, 8388608, 15020, 0, 8388608, 15028, 0, 8388608, 15032, 0, 8388608, 15036, 0, 8388608, 15444, 0, 8388608, 15448, 0, 8388608, 15452, 0, 8388608, 15460, 0, 8388608, 15464, 0, 8388608, 15468, 0, 8388608, 15476, 0, 8388608, 15480, 0, 8388608, 15484, 0, 8388608] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444307137495905_444_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444307137495905_444_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e8ce2ae2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444307137495905_444_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444310853665501_446_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444310853665501_446_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cdea3704 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444310853665501_446_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444311692854246_447_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444311692854246_447_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..55403281 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444311692854246_447_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,322 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 30)) { + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 17, 0, 1408, 17, 0, 2432, 286331153, 0, 2432, 286331153, 0, 2432, 286331153, 0, 2432, 286331153, 0, 2432, 286331153, 0, 2432, 286331153, 0, 2432, 286331153, 0, 2432, 286331153, 0, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3008, 286331153, 286331153, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3328, 1145324612, 1145324612, 3776, 838860, 0, 3776, 838860, 0, 3776, 838860, 0, 3776, 838860, 0, 3776, 838860, 0, 3776, 838860, 0, 3776, 838860, 0, 3776, 838860, 0, 3776, 838860, 0, 3776, 838860, 0, 9536, 0, 1431650304, 9536, 0, 1431650304, 9536, 0, 1431650304, 9536, 0, 1431650304, 9536, 0, 1431650304, 9536, 0, 1431650304, 9536, 0, 1431650304, 9536, 0, 1431650304, 9536, 0, 1431650304, 10176, 17, 0, 10176, 17, 0, 13904, 1024, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444322169459063_449_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444322169459063_449_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7ce7e3d6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444322169459063_449_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,102 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 105 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272, 2432, 511, 4294966272] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444322526489294_450_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444322526489294_450_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c35a33a8 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444322526489294_450_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,370 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 0))) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((207 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((224 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 45)) { + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 56)) { + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((298 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (324 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((339 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((346 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + if ((i6 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3456, 0, 1024, 9744, 8192, 0, 9760, 8192, 0, 9776, 8192, 0, 13268, 256, 1073741824, 13268, 256, 1073741824, 13272, 256, 1073741824, 13272, 256, 1073741824, 13276, 256, 1073741824, 13276, 256, 1073741824, 13284, 256, 1073741824, 13284, 256, 1073741824, 13288, 256, 1073741824, 13288, 256, 1073741824, 13292, 256, 1073741824, 13292, 256, 1073741824, 14356, 16384, 0, 14360, 16384, 0, 14364, 16384, 0, 14372, 16384, 0, 14376, 16384, 0, 14380, 16384, 0, 15424, 0, 2454257664, 15424, 0, 2454257664, 15424, 0, 2454257664, 15424, 0, 2454257664, 15424, 0, 2454257664, 15424, 0, 2454257664, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 17984, 272696336, 68174084, 18000, 272696336, 68174084, 18000, 272696336, 68174084, 18000, 272696336, 68174084, 18000, 272696336, 68174084, 18000, 272696336, 68174084, 18000, 272696336, 68174084, 18000, 272696336, 68174084, 18000, 272696336, 68174084, 18000, 272696336, 68174084, 18000, 272696336, 68174084, 18624, 0, 603979776, 18624, 0, 603979776, 18640, 0, 603979776, 18640, 0, 603979776, 21696, 4, 0, 21712, 4, 0, 21728, 4, 0, 23616, 0, 512, 23632, 0, 512, 24448, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444371480459010_451_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444371480459010_451_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6577339a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444371480459010_451_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,318 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 56))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((138 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((243 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 288 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 17, 0, 2048, 17, 0, 3008, 0, 570425344, 3008, 0, 570425344, 3024, 0, 570425344, 3024, 0, 570425344, 3040, 0, 570425344, 3040, 0, 570425344, 3456, 572662306, 0, 3456, 572662306, 0, 3456, 572662306, 0, 3456, 572662306, 0, 3456, 572662306, 0, 3456, 572662306, 0, 3456, 572662306, 0, 3456, 572662306, 0, 3472, 572662306, 0, 3472, 572662306, 0, 3472, 572662306, 0, 3472, 572662306, 0, 3472, 572662306, 0, 3472, 572662306, 0, 3472, 572662306, 0, 3472, 572662306, 0, 3488, 572662306, 0, 3488, 572662306, 0, 3488, 572662306, 0, 3488, 572662306, 0, 3488, 572662306, 0, 3488, 572662306, 0, 3488, 572662306, 0, 3488, 572662306, 0, 6480, 0, 64, 6484, 0, 64, 6496, 0, 64, 6500, 0, 64, 6512, 0, 64, 6516, 0, 64, 8848, 0, 4194304, 8852, 0, 4194304, 8864, 0, 4194304, 8868, 0, 4194304, 8880, 0, 4194304, 8884, 0, 4194304, 10624, 8, 2147483648, 10624, 8, 2147483648, 14272, 8388608, 0, 14288, 8388608, 0, 14304, 8388608, 0, 15556, 0, 134217728, 15560, 0, 134217728, 15564, 0, 134217728, 15572, 0, 134217728, 15576, 0, 134217728, 15580, 0, 134217728, 15588, 0, 134217728, 15592, 0, 134217728, 15596, 0, 134217728, 16192, 73, 0, 16192, 73, 0, 16192, 73, 0, 16768, 272696336, 68174084, 16768, 272696336, 68174084, 16768, 272696336, 68174084, 16768, 272696336, 68174084, 16768, 272696336, 68174084, 16768, 272696336, 68174084, 16768, 272696336, 68174084, 16768, 272696336, 68174084, 16768, 272696336, 68174084, 16768, 272696336, 68174084, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513, 17088, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444449439332629_453_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444449439332629_453_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..41641d31 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444449439332629_453_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,196 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 16))) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 5444, 1048832, 0, 5444, 1048832, 0, 5448, 1048832, 0, 5448, 1048832, 0, 5460, 1048832, 0, 5460, 1048832, 0, 5464, 1048832, 0, 5464, 1048832, 0, 5476, 1048832, 0, 5476, 1048832, 0, 5480, 1048832, 0, 5480, 1048832, 0, 8640, 65536, 0, 8656, 65536, 0, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11776, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292, 11792, 0, 4294967292] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444466783052935_454_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444466783052935_454_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6f05fa91 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444466783052935_454_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,126 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 48)) { + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 2304, 272696336, 68174084, 2304, 272696336, 68174084, 2304, 272696336, 68174084, 2304, 272696336, 68174084, 2304, 272696336, 68174084, 2304, 272696336, 68174084, 2304, 272696336, 68174084, 2304, 272696336, 68174084, 2304, 272696336, 68174084, 2304, 272696336, 68174084, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513, 2624, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444476402947275_456_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444476402947275_456_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..863547c0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444476402947275_456_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 262272, 16777248, 2240, 262272, 16777248, 2240, 262272, 16777248, 2240, 262272, 16777248, 1856, 64, 8256, 1856, 64, 8256, 1856, 64, 8256] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444476631297953_457_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444476631297953_457_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53c93eda --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444476631297953_457_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444521131535937_460_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444521131535937_460_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cebb53a1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444521131535937_460_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,420 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 58)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 63))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 59)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 23)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((281 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((307 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((316 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + break; + } + case 2: { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((333 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((353 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((367 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((378 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((389 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter6 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((399 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 285 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 8512, 73, 0, 8512, 73, 0, 8512, 73, 0, 14032, 0, 536870912, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 17984, 2181570690, 8521760, 17984, 2181570690, 8521760, 17984, 2181570690, 8521760, 17984, 2181570690, 8521760, 17984, 2181570690, 8521760, 17984, 2181570690, 8521760, 17984, 2181570690, 8521760, 17984, 2181570690, 8521760, 17984, 2181570690, 8521760, 17984, 2181570690, 8521760, 18000, 2181570690, 8521760, 18000, 2181570690, 8521760, 18000, 2181570690, 8521760, 18000, 2181570690, 8521760, 18000, 2181570690, 8521760, 18000, 2181570690, 8521760, 18000, 2181570690, 8521760, 18000, 2181570690, 8521760, 18000, 2181570690, 8521760, 18000, 2181570690, 8521760, 19652, 16, 0, 19656, 16, 0, 19660, 16, 0, 19668, 16, 0, 19672, 16, 0, 19676, 16, 0, 20224, 2181570690, 8521760, 20224, 2181570690, 8521760, 20224, 2181570690, 8521760, 20224, 2181570690, 8521760, 20224, 2181570690, 8521760, 20224, 2181570690, 8521760, 20224, 2181570690, 8521760, 20224, 2181570690, 8521760, 20224, 2181570690, 8521760, 20224, 2181570690, 8521760, 20240, 2181570690, 8521760, 20240, 2181570690, 8521760, 20240, 2181570690, 8521760, 20240, 2181570690, 8521760, 20240, 2181570690, 8521760, 20240, 2181570690, 8521760, 20240, 2181570690, 8521760, 20240, 2181570690, 8521760, 20240, 2181570690, 8521760, 20240, 2181570690, 8521760, 21328, 0, 1, 21344, 0, 1, 22612, 4, 1227096064, 22612, 4, 1227096064, 22612, 4, 1227096064, 22612, 4, 1227096064, 22612, 4, 1227096064, 22612, 4, 1227096064, 22628, 4, 1227096064, 22628, 4, 1227096064, 22628, 4, 1227096064, 22628, 4, 1227096064, 22628, 4, 1227096064, 22628, 4, 1227096064, 24212, 133152, 134217728, 24212, 133152, 134217728, 24212, 133152, 134217728, 24212, 133152, 134217728, 24228, 133152, 134217728, 24228, 133152, 134217728, 24228, 133152, 134217728, 24228, 133152, 134217728] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444573647836402_462_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444573647836402_462_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444573647836402_462_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444573878993174_463_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444573878993174_463_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48cbd7d4 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444573878993174_463_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,221 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 59))) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 45))) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (221 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444574202002604_464_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444574202002604_464_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444574202002604_464_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444709256605027_466_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444709256605027_466_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..72714a26 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444709256605027_466_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,313 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 57))) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 27)) { + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 41))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 7))) { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((269 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 7))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((338 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (343 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3280, 0, 33554432, 3296, 0, 33554432, 10432, 73, 0, 10432, 73, 0, 10432, 73, 0, 16320, 8192, 0, 17232, 0, 2952790016, 17232, 0, 2952790016, 17232, 0, 2952790016, 17248, 0, 2952790016, 17248, 0, 2952790016, 17248, 0, 2952790016, 17264, 0, 2952790016, 17264, 0, 2952790016, 17264, 0, 2952790016, 21200, 65536, 0, 21216, 65536, 0, 21232, 65536, 0, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295, 21952, 4294967295, 4294967295] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444728354013189_467_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444728354013189_467_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9e6a7f65 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444728354013189_467_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,81 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 44)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444728610700071_468_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444728610700071_468_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..15a5c1c0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444728610700071_468_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,271 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 55)) { + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 657 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 5440, 0, 1409286144, 5440, 0, 1409286144, 5440, 0, 1409286144, 8208, 4, 17472, 8208, 4, 17472, 8208, 4, 17472, 8208, 4, 17472, 8224, 4, 17472, 8224, 4, 17472, 8224, 4, 17472, 8224, 4, 17472, 8240, 4, 17472, 8240, 4, 17472, 8240, 4, 17472, 8240, 4, 17472, 9680, 16777472, 4096, 9680, 16777472, 4096, 9680, 16777472, 4096, 9696, 16777472, 4096, 9696, 16777472, 4096, 9696, 16777472, 4096, 9712, 16777472, 4096, 9712, 16777472, 4096, 9712, 16777472, 4096, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10256, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10272, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10288, 1431655765, 1431655765, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 10688, 0, 1431655744, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 14528, 1048575, 4294965248, 13888, 0, 8] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444741710265229_469_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444741710265229_469_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5210cae0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444741710265229_469_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,69 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 57))) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444741872602086_470_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444741872602086_470_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..19984f71 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444741872602086_470_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((29 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((106 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((119 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 18)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 972 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 585, 2449473536, 1856, 585, 2449473536, 1856, 585, 2449473536, 1856, 585, 2449473536, 1856, 585, 2449473536, 1856, 585, 2449473536, 1856, 585, 2449473536, 1860, 585, 2449473536, 1860, 585, 2449473536, 1860, 585, 2449473536, 1860, 585, 2449473536, 1860, 585, 2449473536, 1860, 585, 2449473536, 1860, 585, 2449473536, 1872, 585, 2449473536, 1872, 585, 2449473536, 1872, 585, 2449473536, 1872, 585, 2449473536, 1872, 585, 2449473536, 1872, 585, 2449473536, 1872, 585, 2449473536, 1876, 585, 2449473536, 1876, 585, 2449473536, 1876, 585, 2449473536, 1876, 585, 2449473536, 1876, 585, 2449473536, 1876, 585, 2449473536, 1876, 585, 2449473536, 1888, 585, 2449473536, 1888, 585, 2449473536, 1888, 585, 2449473536, 1888, 585, 2449473536, 1888, 585, 2449473536, 1888, 585, 2449473536, 1888, 585, 2449473536, 1892, 585, 2449473536, 1892, 585, 2449473536, 1892, 585, 2449473536, 1892, 585, 2449473536, 1892, 585, 2449473536, 1892, 585, 2449473536, 1892, 585, 2449473536, 2560, 73, 2415919104, 2560, 73, 2415919104, 2560, 73, 2415919104, 2560, 73, 2415919104, 2560, 73, 2415919104, 2564, 73, 2415919104, 2564, 73, 2415919104, 2564, 73, 2415919104, 2564, 73, 2415919104, 2564, 73, 2415919104, 2576, 73, 2415919104, 2576, 73, 2415919104, 2576, 73, 2415919104, 2576, 73, 2415919104, 2576, 73, 2415919104, 2580, 73, 2415919104, 2580, 73, 2415919104, 2580, 73, 2415919104, 2580, 73, 2415919104, 2580, 73, 2415919104, 2592, 73, 2415919104, 2592, 73, 2415919104, 2592, 73, 2415919104, 2592, 73, 2415919104, 2592, 73, 2415919104, 2596, 73, 2415919104, 2596, 73, 2415919104, 2596, 73, 2415919104, 2596, 73, 2415919104, 2596, 73, 2415919104, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3584, 1363481681, 340870420, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 3904, 4294967295, 4294967295, 4544, 73, 0, 4544, 73, 0, 4544, 73, 0, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5712, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 5728, 219, 3067832320, 6804, 81, 0, 6804, 81, 0, 6804, 81, 0, 6808, 81, 0, 6808, 81, 0, 6808, 81, 0, 6820, 81, 0, 6820, 81, 0, 6820, 81, 0, 6824, 81, 0, 6824, 81, 0, 6824, 81, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8084, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8088, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8100, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8104, 112347, 0, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295, 8576, 4294967295, 4294967295] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444780223416399_471_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444780223416399_471_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ec24ee93 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444780223416399_471_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,168 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 45)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5760, 4095, 4294443008, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5504, 2863308800, 174762, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525, 5248, 1431654400, 349525] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444780647802441_472_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444780647802441_472_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4300c361 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444780647802441_472_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1728, 0, 2684354560, 1728, 0, 2684354560, 2624, 2, 2852126720, 2624, 2, 2852126720, 2624, 2, 2852126720, 2624, 2, 2852126720, 2624, 2, 2852126720, 3328, 2, 2852126720, 3328, 2, 2852126720, 3328, 2, 2852126720, 3328, 2, 2852126720, 3328, 2, 2852126720, 5120, 680, 10485760, 5120, 680, 10485760, 5120, 680, 10485760, 5120, 680, 10485760, 5120, 680, 10485760, 5120, 680, 10485760, 5136, 680, 10485760, 5136, 680, 10485760, 5136, 680, 10485760, 5136, 680, 10485760, 5136, 680, 10485760, 5136, 680, 10485760, 5152, 680, 10485760, 5152, 680, 10485760, 5152, 680, 10485760, 5152, 680, 10485760, 5152, 680, 10485760, 5152, 680, 10485760] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444781633935227_473_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444781633935227_473_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..02d7a3a7 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444781633935227_473_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,104 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 165 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269, 1472, 3067833782, 1840700269] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444782085588411_474_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444782085588411_474_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3a76fc90 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444782085588411_474_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444899448824190_477_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444899448824190_477_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2399e743 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444899448824190_477_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,216 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 42)) { + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 47))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 54))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + case 1: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 35))) { + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 43))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 7))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [13376, 0, 16777216, 15808, 1065472, 73728, 15808, 1065472, 73728, 15808, 1065472, 73728, 15808, 1065472, 73728, 15808, 1065472, 73728, 16896, 786432, 1, 16896, 786432, 1, 16896, 786432, 1] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444903427011538_479_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444903427011538_479_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b445f9e5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444903427011538_479_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,105 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 369 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1152, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1168, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 1184, 2863311530, 2863311530, 3776, 1024, 0, 3780, 1024, 0, 3784, 1024, 0, 3792, 1024, 0, 3796, 1024, 0, 3800, 1024, 0, 3808, 1024, 0, 3812, 1024, 0, 3816, 1024, 0, 4416, 63, 0, 4416, 63, 0, 4416, 63, 0, 4416, 63, 0, 4416, 63, 0, 4416, 63, 0, 4432, 63, 0, 4432, 63, 0, 4432, 63, 0, 4432, 63, 0, 4432, 63, 0, 4432, 63, 0, 4448, 63, 0, 4448, 63, 0, 4448, 63, 0, 4448, 63, 0, 4448, 63, 0, 4448, 63, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444912662746400_480_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444912662746400_480_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..51396154 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444912662746400_480_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,275 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 51))) { + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + if ((i2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 536870912, 512, 3776, 536870912, 512, 3792, 536870912, 512, 3792, 536870912, 512, 3808, 536870912, 512, 3808, 536870912, 512, 4608, 65, 0, 4608, 65, 0, 7168, 272696336, 68174084, 7168, 272696336, 68174084, 7168, 272696336, 68174084, 7168, 272696336, 68174084, 7168, 272696336, 68174084, 7168, 272696336, 68174084, 7168, 272696336, 68174084, 7168, 272696336, 68174084, 7168, 272696336, 68174084, 7168, 272696336, 68174084, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 7488, 68174084, 1090785345, 8704, 85, 0, 8704, 85, 0, 8704, 85, 0, 8704, 85, 0, 12224, 2730, 2852126720, 12224, 2730, 2852126720, 12224, 2730, 2852126720, 12224, 2730, 2852126720, 12224, 2730, 2852126720, 12224, 2730, 2852126720, 12224, 2730, 2852126720, 12224, 2730, 2852126720, 12224, 2730, 2852126720, 12224, 2730, 2852126720] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444921591912064_482_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444921591912064_482_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6bbbe5c7 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444921591912064_482_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,244 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 34)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 33))) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 54))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 321 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5632, 0, 1431650304, 5632, 0, 1431650304, 5632, 0, 1431650304, 5632, 0, 1431650304, 5632, 0, 1431650304, 5632, 0, 1431650304, 5632, 0, 1431650304, 5632, 0, 1431650304, 5632, 0, 1431650304, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 10256, 16777216, 0, 10272, 16777216, 0, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 12416, 4681, 2450072576, 13120, 299593, 2449473536, 13120, 299593, 2449473536, 13120, 299593, 2449473536, 13120, 299593, 2449473536, 13120, 299593, 2449473536, 13120, 299593, 2449473536, 13120, 299593, 2449473536, 13120, 299593, 2449473536, 13120, 299593, 2449473536, 13120, 299593, 2449473536, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 13696, 1363481681, 340870420, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513, 14016, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444923327231951_483_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444923327231951_483_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9a13e8ce --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444923327231951_483_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,98 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2064, 268435456, 1073741824, 2064, 268435456, 1073741824, 2080, 268435456, 1073741824, 2080, 268435456, 1073741824, 2096, 268435456, 1073741824, 2096, 268435456, 1073741824] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444925513001194_484_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444925513001194_484_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8a9e5757 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444925513001194_484_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 2624, 131071, 4290772992, 1472, 4063232, 0, 1472, 4063232, 0, 1472, 4063232, 0, 1472, 4063232, 0, 1472, 4063232, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444925923954546_485_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444925923954546_485_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef59a44f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444925923954546_485_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 1048577, 8194, 1728, 1048577, 8194, 1728, 1048577, 8194, 1728, 1048577, 8194, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765, 1472, 1430607188, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444984295489452_488_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444984295489452_488_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..02e5b40f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444984295489452_488_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,315 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((194 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 2)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 516 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1408, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1424, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 1440, 136348168, 2181570690, 10432, 272696336, 68174084, 10432, 272696336, 68174084, 10432, 272696336, 68174084, 10432, 272696336, 68174084, 10432, 272696336, 68174084, 10432, 272696336, 68174084, 10432, 272696336, 68174084, 10432, 272696336, 68174084, 10432, 272696336, 68174084, 10432, 272696336, 68174084, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 12928, 613566756, 1227133513, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 13824, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 14400, 2863311530, 2863311530, 15040, 17, 0, 15040, 17, 0, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15616, 286331153, 286331153, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 16384, 838860, 0, 16384, 838860, 0, 16384, 838860, 0, 16384, 838860, 0, 16384, 838860, 0, 16384, 838860, 0, 16384, 838860, 0, 16384, 838860, 0, 16384, 838860, 0, 16384, 838860, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756444989597019189_489_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756444989597019189_489_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca911b5e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756444989597019189_489_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,414 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 62))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 53)) { + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 58))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 52))) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((268 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((336 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((343 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((350 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 55))) { + if (((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (408 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (417 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 561 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 7936, 0, 67108864, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 9152, 73, 0, 9152, 73, 0, 9152, 73, 0, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 9728, 1363481681, 340870420, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 20560, 0, 536870912, 20576, 0, 536870912, 21520, 0, 572653568, 21520, 0, 572653568, 21520, 0, 572653568, 21520, 0, 572653568, 21524, 0, 572653568, 21524, 0, 572653568, 21524, 0, 572653568, 21524, 0, 572653568, 21528, 0, 572653568, 21528, 0, 572653568, 21528, 0, 572653568, 21528, 0, 572653568, 21536, 0, 572653568, 21536, 0, 572653568, 21536, 0, 572653568, 21536, 0, 572653568, 21540, 0, 572653568, 21540, 0, 572653568, 21540, 0, 572653568, 21540, 0, 572653568, 21544, 0, 572653568, 21544, 0, 572653568, 21544, 0, 572653568, 21544, 0, 572653568, 21968, 0, 572662304, 21968, 0, 572662304, 21968, 0, 572662304, 21968, 0, 572662304, 21968, 0, 572662304, 21968, 0, 572662304, 21968, 0, 572662304, 21972, 0, 572662304, 21972, 0, 572662304, 21972, 0, 572662304, 21972, 0, 572662304, 21972, 0, 572662304, 21972, 0, 572662304, 21972, 0, 572662304, 21976, 0, 572662304, 21976, 0, 572662304, 21976, 0, 572662304, 21976, 0, 572662304, 21976, 0, 572662304, 21976, 0, 572662304, 21976, 0, 572662304, 21984, 0, 572662304, 21984, 0, 572662304, 21984, 0, 572662304, 21984, 0, 572662304, 21984, 0, 572662304, 21984, 0, 572662304, 21984, 0, 572662304, 21988, 0, 572662304, 21988, 0, 572662304, 21988, 0, 572662304, 21988, 0, 572662304, 21988, 0, 572662304, 21988, 0, 572662304, 21988, 0, 572662304, 21992, 0, 572662304, 21992, 0, 572662304, 21992, 0, 572662304, 21992, 0, 572662304, 21992, 0, 572662304, 21992, 0, 572662304, 21992, 0, 572662304, 22416, 0, 572662304, 22416, 0, 572662304, 22416, 0, 572662304, 22416, 0, 572662304, 22416, 0, 572662304, 22416, 0, 572662304, 22416, 0, 572662304, 22432, 0, 572662304, 22432, 0, 572662304, 22432, 0, 572662304, 22432, 0, 572662304, 22432, 0, 572662304, 22432, 0, 572662304, 22432, 0, 572662304, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 22720, 1145324612, 1145324612, 26112, 0, 2152202240, 26112, 0, 2152202240, 26112, 0, 2152202240, 26688, 4195328, 67125252, 26688, 4195328, 67125252, 26688, 4195328, 67125252, 26688, 4195328, 67125252, 26688, 4195328, 67125252, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536, 27008, 75515908, 1208254536] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756445607591627147_497_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756445607591627147_497_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e71a28da --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756445607591627147_497_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,247 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 41)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 48))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 19))) { + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 54))) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 62)) { + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((215 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1080 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 896, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 912, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 928, 4294967295, 0, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1924, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1928, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1940, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1944, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1956, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 1960, 2863311530, 2863311530, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3776, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3792, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 3808, 0, 4294966784, 13312, 0, 1073741824, 13328, 0, 1073741824, 13344, 0, 1073741824] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446176420747598_499_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446176420747598_499_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a99c6de6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446176420747598_499_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,97 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446176619032565_500_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446176619032565_500_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f72259db --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446176619032565_500_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 38)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 51))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 0, 64, 2192, 0, 64, 3264, 85, 0, 3264, 85, 0, 3264, 85, 0, 3264, 85, 0, 3904, 8, 0, 5760, 545392672, 136348168, 5760, 545392672, 136348168, 5760, 545392672, 136348168, 5760, 545392672, 136348168, 5760, 545392672, 136348168, 5760, 545392672, 136348168, 5760, 545392672, 136348168, 5760, 545392672, 136348168, 5760, 545392672, 136348168, 5760, 545392672, 136348168, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10240, 1145324612, 1145324612, 10688, 559240, 0, 10688, 559240, 0, 10688, 559240, 0, 10688, 559240, 0, 10688, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446298251827015_502_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446298251827015_502_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28398b06 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446298251827015_502_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446463572262146_504_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446463572262146_504_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28a504f3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446463572262146_504_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,352 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 57))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 55))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 50))) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 8064, 0, 536870912, 11840, 131104, 0, 11840, 131104, 0, 13248, 1024, 0, 13264, 1024, 0, 15616, 0, 2048, 18368, 8947848, 0, 18368, 8947848, 0, 18368, 8947848, 0, 18368, 8947848, 0, 18368, 8947848, 0, 18368, 8947848, 0, 18384, 8947848, 0, 18384, 8947848, 0, 18384, 8947848, 0, 18384, 8947848, 0, 18384, 8947848, 0, 18384, 8947848, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446512914304387_506_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446512914304387_506_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a79d0c66 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446512914304387_506_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 54)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 99 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 2097152, 0, 1296, 2097152, 0, 6080, 272696336, 68174084, 6080, 272696336, 68174084, 6080, 272696336, 68174084, 6080, 272696336, 68174084, 6080, 272696336, 68174084, 6080, 272696336, 68174084, 6080, 272696336, 68174084, 6080, 272696336, 68174084, 6080, 272696336, 68174084, 6080, 272696336, 68174084, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513, 7296, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446552729748524_508_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446552729748524_508_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..49cc7ce0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446552729748524_508_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,122 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 49))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 32))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 63, 3221225472, 1088, 63, 3221225472, 1088, 63, 3221225472, 1088, 63, 3221225472, 1088, 63, 3221225472, 1088, 63, 3221225472, 1088, 63, 3221225472, 1088, 63, 3221225472] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446552920238320_509_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446552920238320_509_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3e7fe8ab --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446552920238320_509_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 55))) { + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 37)) { + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 0, 16, 4032, 17, 0, 4032, 17, 0, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 4608, 286331153, 286331153, 5888, 4, 1140850688, 5888, 4, 1140850688, 5888, 4, 1140850688, 8320, 4, 1140850688, 8320, 4, 1140850688, 8320, 4, 1140850688, 10704, 0, 16384, 10720, 0, 16384, 11392, 67108864, 0, 11840, 838860, 0, 11840, 838860, 0, 11840, 838860, 0, 11840, 838860, 0, 11840, 838860, 0, 11840, 838860, 0, 11840, 838860, 0, 11840, 838860, 0, 11840, 838860, 0, 11840, 838860, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446564727715984_511_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446564727715984_511_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c2efb424 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446564727715984_511_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,308 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 35)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 41)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 63)) { + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 54))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 62))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() >= 51)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 123 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5312, 5, 1073741824, 5312, 5, 1073741824, 5312, 5, 1073741824, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 8384, 127, 4290772992, 11408, 544, 572522496, 11408, 544, 572522496, 11408, 544, 572522496, 11408, 544, 572522496, 11408, 544, 572522496, 11424, 544, 572522496, 11424, 544, 572522496, 11424, 544, 572522496, 11424, 544, 572522496, 11424, 544, 572522496, 12624, 2097152, 0, 12640, 2097152, 0, 14592, 4, 1073741824, 14592, 4, 1073741824, 15488, 0, 71303168, 15488, 0, 71303168, 15936, 559240, 0, 15936, 559240, 0, 15936, 559240, 0, 15936, 559240, 0, 15936, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446846692990836_513_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446846692990836_513_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ed3cd32f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446846692990836_513_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,182 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 16)) { + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 48))) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [10304, 0, 1, 9920, 264192, 134479936, 9920, 264192, 134479936, 9920, 264192, 134479936, 9920, 264192, 134479936, 9920, 264192, 134479936, 9664, 0, 2048, 9408, 536870912, 0, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556, 9024, 1431393621, 1431393556] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446847745261791_514_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446847745261791_514_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d60493b5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446847745261791_514_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,125 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 56))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 159 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 848, 1048575, 0, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3088, 1431655765, 1431655765, 3536, 0, 8] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446900843066612_516_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446900843066612_516_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a9c67267 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446900843066612_516_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 54)) { + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 2)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 171 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3776, 2147483648, 0, 3792, 2147483648, 0, 3808, 2147483648, 0, 5584, 545392672, 136348168, 5584, 545392672, 136348168, 5584, 545392672, 136348168, 5584, 545392672, 136348168, 5584, 545392672, 136348168, 5584, 545392672, 136348168, 5584, 545392672, 136348168, 5584, 545392672, 136348168, 5584, 545392672, 136348168, 5584, 545392672, 136348168, 5600, 545392672, 136348168, 5600, 545392672, 136348168, 5600, 545392672, 136348168, 5600, 545392672, 136348168, 5600, 545392672, 136348168, 5600, 545392672, 136348168, 5600, 545392672, 136348168, 5600, 545392672, 136348168, 5600, 545392672, 136348168, 5600, 545392672, 136348168, 6224, 0, 1224736768, 6224, 0, 1224736768, 6224, 0, 1224736768, 6240, 0, 1224736768, 6240, 0, 1224736768, 6240, 0, 1224736768, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6800, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 6816, 68174084, 1090785345, 11072, 2097152, 0, 11088, 2097152, 0, 11104, 2097152, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446907035934466_517_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446907035934466_517_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6cb4a17b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446907035934466_517_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,224 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 59))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 50))) { + if (((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 7296, 0, 1048576, 11968, 4195328, 67125252, 11968, 4195328, 67125252, 11968, 4195328, 67125252, 11968, 4195328, 67125252, 11968, 4195328, 67125252, 12288, 67125252, 1074004032, 12288, 67125252, 1074004032, 12288, 67125252, 1074004032, 12288, 67125252, 1074004032, 12288, 67125252, 1074004032, 12288, 67125252, 1074004032, 12736, 559240, 0, 12736, 559240, 0, 12736, 559240, 0, 12736, 559240, 0, 12736, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446908134944183_518_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446908134944183_518_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7df12d30 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446908134944183_518_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,405 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 62))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 44))) { + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 43))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 53))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 43))) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((258 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((289 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 45)) { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((299 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((312 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (319 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((365 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (374 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 0, 2147483648, 2512, 0, 2147483648, 2528, 0, 2147483648, 4800, 17, 0, 4800, 17, 0, 7552, 68, 0, 7552, 68, 0, 9280, 4, 0, 13504, 1024, 1140850688, 13504, 1024, 1140850688, 13504, 1024, 1140850688, 20416, 559240, 0, 20416, 559240, 0, 20416, 559240, 0, 20416, 559240, 0, 20416, 559240, 0, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 21760, 136348168, 2181570690, 23376, 8, 2147483648, 23376, 8, 2147483648, 23392, 8, 2147483648, 23392, 8, 2147483648, 23408, 8, 2147483648, 23408, 8, 2147483648, 23936, 272696336, 68174084, 23936, 272696336, 68174084, 23936, 272696336, 68174084, 23936, 272696336, 68174084, 23936, 272696336, 68174084, 23936, 272696336, 68174084, 23936, 272696336, 68174084, 23936, 272696336, 68174084, 23936, 272696336, 68174084, 23936, 272696336, 68174084, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513, 24256, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446923390827562_521_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446923390827562_521_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..527b1a3a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446923390827562_521_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,160 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 52)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 243 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 2176, 1048575, 4286578688, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1920, 1431306240, 5592405, 1664, 0, 8192, 3968, 0, 128, 3984, 0, 128, 6784, 272696336, 68174084, 6784, 272696336, 68174084, 6784, 272696336, 68174084, 6784, 272696336, 68174084, 6784, 272696336, 68174084, 6784, 272696336, 68174084, 6784, 272696336, 68174084, 6784, 272696336, 68174084, 6784, 272696336, 68174084, 6784, 272696336, 68174084, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513, 7104, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446951091374013_523_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446951091374013_523_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8b7aa666 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446951091374013_523_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 53))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 24))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4352, 1048576, 0, 4368, 1048576, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446951305565794_524_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446951305565794_524_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a07de160 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446951305565794_524_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 468 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2560, 256, 8, 2560, 256, 8, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2304, 536870655, 0, 2048, 0, 1073741840, 2048, 0, 1073741840, 4480, 8, 0, 4496, 8, 0, 4512, 8, 0, 5380, 19173961, 0, 5380, 19173961, 0, 5380, 19173961, 0, 5380, 19173961, 0, 5380, 19173961, 0, 5380, 19173961, 0, 5380, 19173961, 0, 5380, 19173961, 0, 5380, 19173961, 0, 5396, 19173961, 0, 5396, 19173961, 0, 5396, 19173961, 0, 5396, 19173961, 0, 5396, 19173961, 0, 5396, 19173961, 0, 5396, 19173961, 0, 5396, 19173961, 0, 5396, 19173961, 0, 5412, 19173961, 0, 5412, 19173961, 0, 5412, 19173961, 0, 5412, 19173961, 0, 5412, 19173961, 0, 5412, 19173961, 0, 5412, 19173961, 0, 5412, 19173961, 0, 5412, 19173961, 0, 5828, 9, 0, 5828, 9, 0, 5844, 9, 0, 5844, 9, 0, 5860, 9, 0, 5860, 9, 0, 6976, 272696336, 68174084, 6976, 272696336, 68174084, 6976, 272696336, 68174084, 6976, 272696336, 68174084, 6976, 272696336, 68174084, 6976, 272696336, 68174084, 6976, 272696336, 68174084, 6976, 272696336, 68174084, 6976, 272696336, 68174084, 6976, 272696336, 68174084, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7296, 3067833782, 1840700269, 7936, 17, 0, 7936, 17, 0, 9920, 8194, 537001984, 9920, 8194, 537001984, 9920, 8194, 537001984, 9920, 8194, 537001984, 9936, 8194, 537001984, 9936, 8194, 537001984, 9936, 8194, 537001984, 9936, 8194, 537001984, 10432, 537002016, 2097664, 10432, 537002016, 2097664, 10432, 537002016, 2097664, 10432, 537002016, 2097664, 10432, 537002016, 2097664, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 10752, 1145324612, 1145324612, 11200, 559240, 0, 11200, 559240, 0, 11200, 559240, 0, 11200, 559240, 0, 11200, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446984692830987_526_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446984692830987_526_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..508ee815 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446984692830987_526_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,175 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 4194304, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756446984854848746_527_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756446984854848746_527_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d244cbb6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756446984854848746_527_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,321 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((63 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((83 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 55))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 53))) { + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() >= 34)) { + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 43)) { + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (317 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 732 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 2147483656, 65540, 1856, 2147483656, 65540, 1856, 2147483656, 65540, 1856, 2147483656, 65540, 1472, 0, 4096, 2496, 73, 0, 2496, 73, 0, 2496, 73, 0, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4048, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4052, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4064, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4068, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4080, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 4084, 2181570690, 545392672, 5328, 146, 612368384, 5328, 146, 612368384, 5328, 146, 612368384, 5328, 146, 612368384, 5328, 146, 612368384, 5328, 146, 612368384, 5332, 146, 612368384, 5332, 146, 612368384, 5332, 146, 612368384, 5332, 146, 612368384, 5332, 146, 612368384, 5332, 146, 612368384, 5344, 146, 612368384, 5344, 146, 612368384, 5344, 146, 612368384, 5344, 146, 612368384, 5344, 146, 612368384, 5344, 146, 612368384, 5348, 146, 612368384, 5348, 146, 612368384, 5348, 146, 612368384, 5348, 146, 612368384, 5348, 146, 612368384, 5348, 146, 612368384, 5360, 146, 612368384, 5360, 146, 612368384, 5360, 146, 612368384, 5360, 146, 612368384, 5360, 146, 612368384, 5360, 146, 612368384, 5364, 146, 612368384, 5364, 146, 612368384, 5364, 146, 612368384, 5364, 146, 612368384, 5364, 146, 612368384, 5364, 146, 612368384, 6032, 146, 612368384, 6032, 146, 612368384, 6032, 146, 612368384, 6032, 146, 612368384, 6032, 146, 612368384, 6032, 146, 612368384, 6036, 146, 612368384, 6036, 146, 612368384, 6036, 146, 612368384, 6036, 146, 612368384, 6036, 146, 612368384, 6036, 146, 612368384, 6048, 146, 612368384, 6048, 146, 612368384, 6048, 146, 612368384, 6048, 146, 612368384, 6048, 146, 612368384, 6048, 146, 612368384, 6052, 146, 612368384, 6052, 146, 612368384, 6052, 146, 612368384, 6052, 146, 612368384, 6052, 146, 612368384, 6052, 146, 612368384, 6064, 146, 612368384, 6064, 146, 612368384, 6064, 146, 612368384, 6064, 146, 612368384, 6064, 146, 612368384, 6064, 146, 612368384, 6068, 146, 612368384, 6068, 146, 612368384, 6068, 146, 612368384, 6068, 146, 612368384, 6068, 146, 612368384, 6068, 146, 612368384, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6608, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6612, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6624, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6628, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6640, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 6644, 2181570690, 545392672, 17984, 0, 1431654400, 17984, 0, 1431654400, 17984, 0, 1431654400, 17984, 0, 1431654400, 17984, 0, 1431654400, 17984, 0, 1431654400, 17984, 0, 1431654400, 17984, 0, 1431654400, 17984, 0, 1431654400, 17984, 0, 1431654400, 19136, 0, 1431306240, 19136, 0, 1431306240, 19136, 0, 1431306240, 19136, 0, 1431306240, 19136, 0, 1431306240, 19136, 0, 1431306240, 19840, 0, 1431306240, 19840, 0, 1431306240, 19840, 0, 1431306240, 19840, 0, 1431306240, 19840, 0, 1431306240, 19840, 0, 1431306240, 20288, 0, 1431654400, 20288, 0, 1431654400, 20288, 0, 1431654400, 20288, 0, 1431654400, 20288, 0, 1431654400, 20288, 0, 1431654400, 20288, 0, 1431654400, 20288, 0, 1431654400, 20288, 0, 1431654400, 20288, 0, 1431654400] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447002136538921_528_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447002136538921_528_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d74c5d58 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447002136538921_528_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,560 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 53))) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 23)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 59))) { + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 42)) { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((345 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((358 << 6) | (counter5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((365 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((414 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i8 = 0; (i8 < 3); i8 = (i8 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((433 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((444 << 6) | (counter7 << 4)) | (i8 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((453 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (471 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (485 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (496 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter9 = 0; + while ((counter9 < 2)) { + counter9 = (counter9 + 1); + if ((WaveGetLaneIndex() == 22)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((513 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((524 << 6) | (counter9 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 51)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (534 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (545 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (555 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (564 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (569 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (576 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (580 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (589 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 729 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [22100, 279620, 1145044992, 22100, 279620, 1145044992, 22100, 279620, 1145044992, 22100, 279620, 1145044992, 22100, 279620, 1145044992, 22100, 279620, 1145044992, 22100, 279620, 1145044992, 22100, 279620, 1145044992, 22104, 279620, 1145044992, 22104, 279620, 1145044992, 22104, 279620, 1145044992, 22104, 279620, 1145044992, 22104, 279620, 1145044992, 22104, 279620, 1145044992, 22104, 279620, 1145044992, 22104, 279620, 1145044992, 22108, 279620, 1145044992, 22108, 279620, 1145044992, 22108, 279620, 1145044992, 22108, 279620, 1145044992, 22108, 279620, 1145044992, 22108, 279620, 1145044992, 22108, 279620, 1145044992, 22108, 279620, 1145044992, 22116, 279620, 1145044992, 22116, 279620, 1145044992, 22116, 279620, 1145044992, 22116, 279620, 1145044992, 22116, 279620, 1145044992, 22116, 279620, 1145044992, 22116, 279620, 1145044992, 22116, 279620, 1145044992, 22120, 279620, 1145044992, 22120, 279620, 1145044992, 22120, 279620, 1145044992, 22120, 279620, 1145044992, 22120, 279620, 1145044992, 22120, 279620, 1145044992, 22120, 279620, 1145044992, 22120, 279620, 1145044992, 22124, 279620, 1145044992, 22124, 279620, 1145044992, 22124, 279620, 1145044992, 22124, 279620, 1145044992, 22124, 279620, 1145044992, 22124, 279620, 1145044992, 22124, 279620, 1145044992, 22124, 279620, 1145044992, 22932, 279620, 1145307136, 22932, 279620, 1145307136, 22932, 279620, 1145307136, 22932, 279620, 1145307136, 22932, 279620, 1145307136, 22932, 279620, 1145307136, 22932, 279620, 1145307136, 22932, 279620, 1145307136, 22932, 279620, 1145307136, 22936, 279620, 1145307136, 22936, 279620, 1145307136, 22936, 279620, 1145307136, 22936, 279620, 1145307136, 22936, 279620, 1145307136, 22936, 279620, 1145307136, 22936, 279620, 1145307136, 22936, 279620, 1145307136, 22936, 279620, 1145307136, 22940, 279620, 1145307136, 22940, 279620, 1145307136, 22940, 279620, 1145307136, 22940, 279620, 1145307136, 22940, 279620, 1145307136, 22940, 279620, 1145307136, 22940, 279620, 1145307136, 22940, 279620, 1145307136, 22940, 279620, 1145307136, 22948, 279620, 1145307136, 22948, 279620, 1145307136, 22948, 279620, 1145307136, 22948, 279620, 1145307136, 22948, 279620, 1145307136, 22948, 279620, 1145307136, 22948, 279620, 1145307136, 22948, 279620, 1145307136, 22948, 279620, 1145307136, 22952, 279620, 1145307136, 22952, 279620, 1145307136, 22952, 279620, 1145307136, 22952, 279620, 1145307136, 22952, 279620, 1145307136, 22952, 279620, 1145307136, 22952, 279620, 1145307136, 22952, 279620, 1145307136, 22952, 279620, 1145307136, 22956, 279620, 1145307136, 22956, 279620, 1145307136, 22956, 279620, 1145307136, 22956, 279620, 1145307136, 22956, 279620, 1145307136, 22956, 279620, 1145307136, 22956, 279620, 1145307136, 22956, 279620, 1145307136, 22956, 279620, 1145307136, 25280, 559240, 0, 25280, 559240, 0, 25280, 559240, 0, 25280, 559240, 0, 25280, 559240, 0, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26512, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 26528, 286331153, 286331153, 27728, 1, 268435456, 27728, 1, 268435456, 27732, 1, 268435456, 27732, 1, 268435456, 27736, 1, 268435456, 27736, 1, 268435456, 27744, 1, 268435456, 27744, 1, 268435456, 27748, 1, 268435456, 27748, 1, 268435456, 27752, 1, 268435456, 27752, 1, 268435456, 28432, 17, 286261248, 28432, 17, 286261248, 28432, 17, 286261248, 28432, 17, 286261248, 28432, 17, 286261248, 28436, 17, 286261248, 28436, 17, 286261248, 28436, 17, 286261248, 28436, 17, 286261248, 28436, 17, 286261248, 28440, 17, 286261248, 28440, 17, 286261248, 28440, 17, 286261248, 28440, 17, 286261248, 28440, 17, 286261248, 28448, 17, 286261248, 28448, 17, 286261248, 28448, 17, 286261248, 28448, 17, 286261248, 28448, 17, 286261248, 28452, 17, 286261248, 28452, 17, 286261248, 28452, 17, 286261248, 28452, 17, 286261248, 28452, 17, 286261248, 28456, 17, 286261248, 28456, 17, 286261248, 28456, 17, 286261248, 28456, 17, 286261248, 28456, 17, 286261248, 30144, 51, 536870912, 30144, 51, 536870912, 30144, 51, 536870912, 30144, 51, 536870912, 30144, 51, 536870912, 31744, 51, 536870912, 31744, 51, 536870912, 31744, 51, 536870912, 31744, 51, 536870912, 31744, 51, 536870912, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33552, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 33568, 1145324612, 1145324612, 36864, 559240, 0, 36864, 559240, 0, 36864, 559240, 0, 36864, 559240, 0, 36864, 559240, 0, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936, 37696, 2290649224, 2290124936] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447121801866959_529_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447121801866959_529_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ccd49013 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447121801866959_529_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,184 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 47)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 62))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 201 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3968, 0, 1073774592, 3968, 0, 1073774592, 5440, 0, 1073741824, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 6592, 63, 4278190080, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 10048, 4095, 4294934528, 9664, 0, 24576, 9664, 0, 24576, 9408, 524288, 512, 9408, 524288, 512, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461, 9024, 1431654400, 5461] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447122903263815_530_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447122903263815_530_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9a664967 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447122903263815_530_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 147 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1280, 4294967295, 0, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528, 1024, 0, 4294934528] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447123199040625_531_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447123199040625_531_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d7ee8dbf --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447123199040625_531_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,151 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 35)) { + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 189 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 5440, 1431639381, 1431655765, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530, 6144, 2863311530, 2863311530] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447124014008500_532_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447124014008500_532_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b1a3ec90 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447124014008500_532_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,185 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 44)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i1 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 459 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 976, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 992, 2863311530, 2863311530, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1680, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 1696, 1431655765, 1431655765, 2496, 73, 0, 2496, 73, 0, 2496, 73, 0, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 5952, 6, 0, 5952, 6, 0, 5956, 6, 0, 5956, 6, 0, 5960, 6, 0, 5960, 6, 0, 5968, 6, 0, 5968, 6, 0, 5972, 6, 0, 5972, 6, 0, 5976, 6, 0, 5976, 6, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447144002440017_534_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447144002440017_534_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..44e599dc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447144002440017_534_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,322 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 50)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 2)) { + break; + } + } + break; + } + case 2: { + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 408 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 6656, 613566756, 1227133513, 8848, 0, 286261248, 8848, 0, 286261248, 8848, 0, 286261248, 8864, 0, 286261248, 8864, 0, 286261248, 8864, 0, 286261248, 10496, 51, 0, 10496, 51, 0, 10496, 51, 0, 10496, 51, 0, 10512, 51, 0, 10512, 51, 0, 10512, 51, 0, 10512, 51, 0, 12164, 0, 196609, 12164, 0, 196609, 12164, 0, 196609, 12168, 0, 196609, 12168, 0, 196609, 12168, 0, 196609, 12172, 0, 196609, 12172, 0, 196609, 12172, 0, 196609, 12180, 0, 196609, 12180, 0, 196609, 12180, 0, 196609, 12184, 0, 196609, 12184, 0, 196609, 12184, 0, 196609, 12188, 0, 196609, 12188, 0, 196609, 12188, 0, 196609, 12864, 1, 858783744, 12864, 1, 858783744, 12864, 1, 858783744, 12864, 1, 858783744, 12864, 1, 858783744, 12864, 1, 858783744, 12864, 1, 858783744, 12880, 1, 858783744, 12880, 1, 858783744, 12880, 1, 858783744, 12880, 1, 858783744, 12880, 1, 858783744, 12880, 1, 858783744, 12880, 1, 858783744, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 13184, 2004318071, 2004318071, 14528, 8, 2147483648, 14528, 8, 2147483648, 19408, 0, 134250496, 19408, 0, 134250496, 19424, 0, 134250496, 19424, 0, 134250496, 19440, 0, 134250496, 19440, 0, 134250496] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447239479107317_535_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447239479107317_535_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..df667763 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447239479107317_535_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 50))) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 18)) { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6480, 0, 67108864, 6496, 0, 67108864] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447240402475402_536_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447240402475402_536_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f5ebe2a3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447240402475402_536_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,361 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 63))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 50))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (297 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (323 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((357 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((368 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (395 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 603 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2256, 2147483648, 0, 2272, 2147483648, 0, 2288, 2147483648, 0, 3600, 10, 2863300608, 3600, 10, 2863300608, 3600, 10, 2863300608, 3600, 10, 2863300608, 3600, 10, 2863300608, 3600, 10, 2863300608, 3600, 10, 2863300608, 3600, 10, 2863300608, 3600, 10, 2863300608, 3600, 10, 2863300608, 3600, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3604, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3608, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3616, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3620, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3624, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3632, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3636, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 3640, 10, 2863300608, 4304, 174762, 2147483648, 4304, 174762, 2147483648, 4304, 174762, 2147483648, 4304, 174762, 2147483648, 4304, 174762, 2147483648, 4304, 174762, 2147483648, 4304, 174762, 2147483648, 4304, 174762, 2147483648, 4304, 174762, 2147483648, 4304, 174762, 2147483648, 4308, 174762, 2147483648, 4308, 174762, 2147483648, 4308, 174762, 2147483648, 4308, 174762, 2147483648, 4308, 174762, 2147483648, 4308, 174762, 2147483648, 4308, 174762, 2147483648, 4308, 174762, 2147483648, 4308, 174762, 2147483648, 4308, 174762, 2147483648, 4312, 174762, 2147483648, 4312, 174762, 2147483648, 4312, 174762, 2147483648, 4312, 174762, 2147483648, 4312, 174762, 2147483648, 4312, 174762, 2147483648, 4312, 174762, 2147483648, 4312, 174762, 2147483648, 4312, 174762, 2147483648, 4312, 174762, 2147483648, 4320, 174762, 2147483648, 4320, 174762, 2147483648, 4320, 174762, 2147483648, 4320, 174762, 2147483648, 4320, 174762, 2147483648, 4320, 174762, 2147483648, 4320, 174762, 2147483648, 4320, 174762, 2147483648, 4320, 174762, 2147483648, 4320, 174762, 2147483648, 4324, 174762, 2147483648, 4324, 174762, 2147483648, 4324, 174762, 2147483648, 4324, 174762, 2147483648, 4324, 174762, 2147483648, 4324, 174762, 2147483648, 4324, 174762, 2147483648, 4324, 174762, 2147483648, 4324, 174762, 2147483648, 4324, 174762, 2147483648, 4328, 174762, 2147483648, 4328, 174762, 2147483648, 4328, 174762, 2147483648, 4328, 174762, 2147483648, 4328, 174762, 2147483648, 4328, 174762, 2147483648, 4328, 174762, 2147483648, 4328, 174762, 2147483648, 4328, 174762, 2147483648, 4328, 174762, 2147483648, 4336, 174762, 2147483648, 4336, 174762, 2147483648, 4336, 174762, 2147483648, 4336, 174762, 2147483648, 4336, 174762, 2147483648, 4336, 174762, 2147483648, 4336, 174762, 2147483648, 4336, 174762, 2147483648, 4336, 174762, 2147483648, 4336, 174762, 2147483648, 4340, 174762, 2147483648, 4340, 174762, 2147483648, 4340, 174762, 2147483648, 4340, 174762, 2147483648, 4340, 174762, 2147483648, 4340, 174762, 2147483648, 4340, 174762, 2147483648, 4340, 174762, 2147483648, 4340, 174762, 2147483648, 4340, 174762, 2147483648, 4344, 174762, 2147483648, 4344, 174762, 2147483648, 4344, 174762, 2147483648, 4344, 174762, 2147483648, 4344, 174762, 2147483648, 4344, 174762, 2147483648, 4344, 174762, 2147483648, 4344, 174762, 2147483648, 4344, 174762, 2147483648, 4344, 174762, 2147483648, 5200, 128, 0, 5216, 128, 0, 5232, 128, 0, 12224, 0, 4, 16768, 4096, 0, 17344, 1, 0, 20672, 33554432, 0, 21376, 33554432, 0, 25280, 33554432, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447382757823967_538_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447382757823967_538_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a84f44a7 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447382757823967_538_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3264, 2147483647, 0, 3008, 0, 1049096, 3008, 0, 1049096, 3008, 0, 1049096, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198, 2624, 0, 4293918198] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447383094745155_539_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447383094745155_539_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5d786253 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447383094745155_539_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,177 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 34)) { + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 486 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 0, 286331153, 1728, 0, 286331153, 1728, 0, 286331153, 1728, 0, 286331153, 1728, 0, 286331153, 1728, 0, 286331153, 1728, 0, 286331153, 1728, 0, 286331153, 1744, 0, 286331153, 1744, 0, 286331153, 1744, 0, 286331153, 1744, 0, 286331153, 1744, 0, 286331153, 1744, 0, 286331153, 1744, 0, 286331153, 1744, 0, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2304, 286331153, 286331153, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 2624, 2004318071, 2004318071, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 3072, 1048575, 0, 4432, 0, 1048576, 4448, 0, 1048576, 5648, 5, 1430257664, 5648, 5, 1430257664, 5648, 5, 1430257664, 5648, 5, 1430257664, 5648, 5, 1430257664, 5648, 5, 1430257664, 5648, 5, 1430257664, 5652, 5, 1430257664, 5652, 5, 1430257664, 5652, 5, 1430257664, 5652, 5, 1430257664, 5652, 5, 1430257664, 5652, 5, 1430257664, 5652, 5, 1430257664, 5664, 5, 1430257664, 5664, 5, 1430257664, 5664, 5, 1430257664, 5664, 5, 1430257664, 5664, 5, 1430257664, 5664, 5, 1430257664, 5664, 5, 1430257664, 5668, 5, 1430257664, 5668, 5, 1430257664, 5668, 5, 1430257664, 5668, 5, 1430257664, 5668, 5, 1430257664, 5668, 5, 1430257664, 5668, 5, 1430257664, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765, 6208, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447413545467330_542_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447413545467330_542_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa76abe8 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447413545467330_542_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,273 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 33)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() < 23)) { + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 273 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1984, 73, 0, 1984, 73, 0, 1984, 73, 0, 4736, 272696336, 68174084, 4736, 272696336, 68174084, 4736, 272696336, 68174084, 4736, 272696336, 68174084, 4736, 272696336, 68174084, 4736, 272696336, 68174084, 4736, 272696336, 68174084, 4736, 272696336, 68174084, 4736, 272696336, 68174084, 4736, 272696336, 68174084, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6272, 613566756, 1227133513, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 6912, 8388607, 0, 8080, 512, 0, 8096, 512, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9552, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 9568, 2796202, 0, 11600, 4096, 0, 11616, 4096, 0, 12816, 4210688, 0, 12816, 4210688, 0, 12832, 4210688, 0, 12832, 4210688, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447415904748408_543_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447415904748408_543_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..041a04ee --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447415904748408_543_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 65536, 2097154, 2752, 65536, 2097154, 2752, 65536, 2097154, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 2368, 0, 4292870141, 1856, 1023, 0, 1856, 1023, 0, 1856, 1023, 0, 1856, 1023, 0, 1856, 1023, 0, 1856, 1023, 0, 1856, 1023, 0, 1856, 1023, 0, 1856, 1023, 0, 1856, 1023, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447416352572777_544_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447416352572777_544_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e0989e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447416352572777_544_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,176 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 18 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1408, 5, 0, 1408, 5, 0, 2688, 5, 0, 2688, 5, 0, 3776, 80, 0, 3776, 80, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447419346795503_546_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447419346795503_546_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..32b2d821 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447419346795503_546_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,256 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 23)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 32)) { + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 306 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 3520, 2860515328, 2863311530, 4736, 134217728, 655360, 4736, 134217728, 655360, 4736, 134217728, 655360, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 5632, 1431655765, 1431655765, 7808, 1398101, 0, 7808, 1398101, 0, 7808, 1398101, 0, 7808, 1398101, 0, 7808, 1398101, 0, 7808, 1398101, 0, 7808, 1398101, 0, 7808, 1398101, 0, 7808, 1398101, 0, 7808, 1398101, 0, 7808, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7824, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 7840, 1398101, 0, 11008, 21, 0, 11008, 21, 0, 11008, 21, 0, 11024, 21, 0, 11024, 21, 0, 11024, 21, 0, 11040, 21, 0, 11040, 21, 0, 11040, 21, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447436295202893_548_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447436295202893_548_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8ab7dcc0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447436295202893_548_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,288 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 44)) { + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 6))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 61))) { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 47)) { + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 792 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 1152, 286331153, 286331153, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2368, 1090785345, 272696336, 2944, 272696336, 68174084, 2944, 272696336, 68174084, 2944, 272696336, 68174084, 2944, 272696336, 68174084, 2944, 272696336, 68174084, 2944, 272696336, 68174084, 2944, 272696336, 68174084, 2944, 272696336, 68174084, 2944, 272696336, 68174084, 2944, 272696336, 68174084, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3264, 605176100, 1092883009, 3712, 559240, 0, 3712, 559240, 0, 3712, 559240, 0, 3712, 559240, 0, 3712, 559240, 0, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 4608, 0, 4294705152, 7616, 80, 0, 7616, 80, 0, 8528, 511, 0, 8528, 511, 0, 8528, 511, 0, 8528, 511, 0, 8528, 511, 0, 8528, 511, 0, 8528, 511, 0, 8528, 511, 0, 8528, 511, 0, 8544, 511, 0, 8544, 511, 0, 8544, 511, 0, 8544, 511, 0, 8544, 511, 0, 8544, 511, 0, 8544, 511, 0, 8544, 511, 0, 8544, 511, 0, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11600, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 11616, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12628, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12632, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12644, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 12648, 1431639381, 1365, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13200, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730, 13216, 2863311530, 2730] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447463552635130_549_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447463552635130_549_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cac09fc1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447463552635130_549_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,374 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((29 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 35)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 55)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 48))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 13))) { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 36))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (402 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (411 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (434 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (444 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (459 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (466 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 177 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1860, 17301504, 8, 1860, 17301504, 8, 1860, 17301504, 8, 1864, 17301504, 8, 1864, 17301504, 8, 1864, 17301504, 8, 1876, 17301504, 8, 1876, 17301504, 8, 1876, 17301504, 8, 1880, 17301504, 8, 1880, 17301504, 8, 1880, 17301504, 8, 3332, 3, 0, 3332, 3, 0, 3336, 3, 0, 3336, 3, 0, 3348, 3, 0, 3348, 3, 0, 3352, 3, 0, 3352, 3, 0, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 12752, 0, 2147483648, 12768, 0, 2147483648, 12784, 0, 2147483648, 14416, 4369, 285212672, 14416, 4369, 285212672, 14416, 4369, 285212672, 14416, 4369, 285212672, 14416, 4369, 285212672, 14416, 4369, 285212672, 14432, 4369, 285212672, 14432, 4369, 285212672, 14432, 4369, 285212672, 14432, 4369, 285212672, 14432, 4369, 285212672, 14432, 4369, 285212672, 14448, 4369, 285212672, 14448, 4369, 285212672, 14448, 4369, 285212672, 14448, 4369, 285212672, 14448, 4369, 285212672, 14448, 4369, 285212672, 24064, 32, 0, 29376, 64, 262144, 29376, 64, 262144, 29824, 559240, 0, 29824, 559240, 0, 29824, 559240, 0, 29824, 559240, 0, 29824, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447487069783237_550_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447487069783237_550_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e8138671 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447487069783237_550_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,265 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 11))) { + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 26))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((128 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((counter2 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 33)) { + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((211 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((218 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 32)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [11072, 0, 256, 12624, 0, 131072, 12640, 0, 131072, 13524, 0, 8388608, 13528, 0, 8388608, 13532, 0, 8388608, 13540, 0, 8388608, 13544, 0, 8388608, 13548, 0, 8388608, 13972, 0, 8, 13976, 0, 8, 13980, 0, 8, 13988, 0, 8, 13992, 0, 8, 13996, 0, 8, 15104, 1023, 0, 15104, 1023, 0, 15104, 1023, 0, 15104, 1023, 0, 15104, 1023, 0, 15104, 1023, 0, 15104, 1023, 0, 15104, 1023, 0, 15104, 1023, 0, 15104, 1023, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0, 18048, 8191, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447509352006664_551_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447509352006664_551_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eb91c345 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447509352006664_551_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 26)) { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 4160, 559240, 0, 4160, 559240, 0, 4160, 559240, 0, 4160, 559240, 0, 4160, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447509825654948_552_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447509825654948_552_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3b5e3055 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447509825654948_552_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,213 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 210 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1984, 4, 0, 2432, 4, 0, 6528, 32768, 0, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 6272, 2863278762, 2863311530, 5888, 0, 1409286144, 5888, 0, 1409286144, 5888, 0, 1409286144, 5504, 87381, 0, 5504, 87381, 0, 5504, 87381, 0, 5504, 87381, 0, 5504, 87381, 0, 5504, 87381, 0, 5504, 87381, 0, 5504, 87381, 0, 5504, 87381, 0, 7168, 85, 0, 7168, 85, 0, 7168, 85, 0, 7168, 85, 0, 8896, 2147483650, 0, 8896, 2147483650, 0, 8912, 2147483650, 0, 8912, 2147483650, 0, 9856, 134217728, 536870912, 9856, 134217728, 536870912, 9872, 134217728, 536870912, 9872, 134217728, 536870912] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756447572056878155_555_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756447572056878155_555_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ce2c2794 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756447572056878155_555_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,162 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 51))) { + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((149 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 27 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3904, 128, 0, 3920, 128, 0, 5060, 0, 4194304, 5064, 0, 4194304, 5076, 0, 4194304, 5080, 0, 4194304, 5760, 0, 4194304, 5776, 0, 4194304, 6720, 128, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448016513202716_558_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448016513202716_558_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..36bc17e9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448016513202716_558_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,100 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 903 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2000, 10240, 8192, 2000, 10240, 8192, 2000, 10240, 8192, 2016, 10240, 8192, 2016, 10240, 8192, 2016, 10240, 8192, 2032, 10240, 8192, 2032, 10240, 8192, 2032, 10240, 8192, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3156, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3160, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3164, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3172, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3176, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3180, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3188, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3192, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530, 3196, 2863311530, 2863311530] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448021035920133_559_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448021035920133_559_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b263210e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448021035920133_559_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,292 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 52))) { + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 52)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() == 44)) { + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 46)) { + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4416, 64, 0, 4432, 64, 0, 10512, 0, 603979776, 10512, 0, 603979776, 10528, 0, 603979776, 10528, 0, 603979776, 10544, 0, 603979776, 10544, 0, 603979776, 16912, 0, 1227128832, 16912, 0, 1227128832, 16912, 0, 1227128832, 16912, 0, 1227128832, 16912, 0, 1227128832, 16912, 0, 1227128832, 16928, 0, 1227128832, 16928, 0, 1227128832, 16928, 0, 1227128832, 16928, 0, 1227128832, 16928, 0, 1227128832, 16928, 0, 1227128832, 16944, 0, 1227128832, 16944, 0, 1227128832, 16944, 0, 1227128832, 16944, 0, 1227128832, 16944, 0, 1227128832, 16944, 0, 1227128832] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448026038314873_560_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448026038314873_560_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..42013116 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448026038314873_560_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 54)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter1 == 2)) { + break; + } + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 360 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1216, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1232, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 1248, 286331153, 286331153, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2832, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 2848, 572662306, 572662306, 5824, 0, 2290649088, 5824, 0, 2290649088, 5824, 0, 2290649088, 5824, 0, 2290649088, 5824, 0, 2290649088, 5824, 0, 2290649088, 5840, 0, 2290649088, 5840, 0, 2290649088, 5840, 0, 2290649088, 5840, 0, 2290649088, 5840, 0, 2290649088, 5840, 0, 2290649088, 7236, 0, 134217728, 7240, 0, 134217728, 7252, 0, 134217728, 7256, 0, 134217728, 9220, 559240, 0, 9220, 559240, 0, 9220, 559240, 0, 9220, 559240, 0, 9220, 559240, 0, 9224, 559240, 0, 9224, 559240, 0, 9224, 559240, 0, 9224, 559240, 0, 9224, 559240, 0, 9236, 559240, 0, 9236, 559240, 0, 9236, 559240, 0, 9236, 559240, 0, 9236, 559240, 0, 9240, 559240, 0, 9240, 559240, 0, 9240, 559240, 0, 9240, 559240, 0, 9240, 559240, 0, 10496, 136, 0, 10496, 136, 0, 10512, 136, 0, 10512, 136, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448062858150765_561_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448062858150765_561_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..33863bfd --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448062858150765_561_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,143 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((72 << 6) | (counter0 << 4)) | (i1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((95 << 6) | (counter0 << 4)) | (i1 << 2)) | i2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((111 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1360, 2147549184, 16384, 1360, 2147549184, 16384, 1360, 2147549184, 16384, 2704, 256, 512, 2704, 256, 512, 2708, 256, 512, 2708, 256, 512, 4624, 4096, 0, 4625, 4096, 0, 4626, 4096, 0, 4628, 4096, 0, 4629, 4096, 0, 4630, 4096, 0, 6096, 69632, 0, 6096, 69632, 0, 6097, 69632, 0, 6097, 69632, 0, 6098, 69632, 0, 6098, 69632, 0, 6100, 69632, 0, 6100, 69632, 0, 6101, 69632, 0, 6101, 69632, 0, 6102, 69632, 0, 6102, 69632, 0, 7824, 4, 33554432, 7824, 4, 33554432] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448082279678059_562_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448082279678059_562_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..979e1193 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448082279678059_562_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,377 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 50))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 48))) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 54))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 53)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (318 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((340 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 51))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((372 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((385 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((398 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 516 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2112, 537002016, 2097664, 2112, 537002016, 2097664, 2112, 537002016, 2097664, 2112, 537002016, 2097664, 2112, 537002016, 2097664, 4944, 0, 262208, 4944, 0, 262208, 4960, 0, 262208, 4960, 0, 262208, 7056, 2184, 2290647040, 7056, 2184, 2290647040, 7056, 2184, 2290647040, 7056, 2184, 2290647040, 7056, 2184, 2290647040, 7056, 2184, 2290647040, 7056, 2184, 2290647040, 7056, 2184, 2290647040, 7072, 2184, 2290647040, 7072, 2184, 2290647040, 7072, 2184, 2290647040, 7072, 2184, 2290647040, 7072, 2184, 2290647040, 7072, 2184, 2290647040, 7072, 2184, 2290647040, 7072, 2184, 2290647040, 9680, 34824, 2290614272, 9680, 34824, 2290614272, 9680, 34824, 2290614272, 9680, 34824, 2290614272, 9680, 34824, 2290614272, 9680, 34824, 2290614272, 9680, 34824, 2290614272, 9696, 34824, 2290614272, 9696, 34824, 2290614272, 9696, 34824, 2290614272, 9696, 34824, 2290614272, 9696, 34824, 2290614272, 9696, 34824, 2290614272, 9696, 34824, 2290614272, 10384, 8, 2290649088, 10384, 8, 2290649088, 10384, 8, 2290649088, 10384, 8, 2290649088, 10384, 8, 2290649088, 10384, 8, 2290649088, 10384, 8, 2290649088, 10400, 8, 2290649088, 10400, 8, 2290649088, 10400, 8, 2290649088, 10400, 8, 2290649088, 10400, 8, 2290649088, 10400, 8, 2290649088, 10400, 8, 2290649088, 11088, 8, 2147483648, 11088, 8, 2147483648, 11104, 8, 2147483648, 11104, 8, 2147483648, 12096, 73, 0, 12096, 73, 0, 12096, 73, 0, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 14080, 272696336, 68174084, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21776, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 21792, 4095, 4286578688, 23824, 2147483648, 524288, 23824, 2147483648, 524288, 23840, 2147483648, 524288, 23840, 2147483648, 524288, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25488, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144, 25504, 65535, 4292870144] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448182758055198_564_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448182758055198_564_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1487dbfb --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448182758055198_564_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 43))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 4096, 0, 2147483648, 4112, 0, 2147483648, 4736, 85, 0, 4736, 85, 0, 4736, 85, 0, 4736, 85, 0, 6464, 0, 2147483648, 6480, 0, 2147483648, 6912, 0, 2147483648, 6928, 0, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448193707149436_567_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448193707149436_567_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b86e7c24 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448193707149436_567_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,252 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 54))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 54))) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 255 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 1216, 511, 4227858432, 2560, 16, 0, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 495, 4227858432, 6272, 16, 0, 8192, 526336, 0, 8192, 526336, 0, 8208, 526336, 0, 8208, 526336, 0, 9152, 0, 4194304, 9156, 0, 4194304, 9160, 0, 4194304, 9168, 0, 4194304, 9172, 0, 4194304, 9176, 0, 4194304, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 10880, 2863311530, 2863311530, 12032, 170, 2684354560, 12032, 170, 2684354560, 12032, 170, 2684354560, 12032, 170, 2684354560, 12032, 170, 2684354560, 12032, 170, 2684354560, 14336, 170, 2684354560, 14336, 170, 2684354560, 14336, 170, 2684354560, 14336, 170, 2684354560, 14336, 170, 2684354560, 14336, 170, 2684354560] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448194848498681_568_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448194848498681_568_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5e6e94ca --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448194848498681_568_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,418 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((146 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((206 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((215 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((220 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((227 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((231 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((242 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (273 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((323 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i7 = 0; (i7 < 3); i7 = (i7 + 1)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((351 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((358 << 6) | (i7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (362 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 573 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 2752, 1, 0, 7684, 1048832, 0, 7684, 1048832, 0, 7688, 1048832, 0, 7688, 1048832, 0, 7692, 1048832, 0, 7692, 1048832, 0, 7700, 1048832, 0, 7700, 1048832, 0, 7704, 1048832, 0, 7704, 1048832, 0, 7708, 1048832, 0, 7708, 1048832, 0, 8132, 256, 0, 8136, 256, 0, 8140, 256, 0, 8148, 256, 0, 8152, 256, 0, 8156, 256, 0, 12096, 67108928, 262144, 12096, 67108928, 262144, 12096, 67108928, 262144, 12112, 67108928, 262144, 12112, 67108928, 262144, 12112, 67108928, 262144, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14084, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14088, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14100, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 14104, 1145324612, 1145324612, 15492, 68, 1145324544, 15492, 68, 1145324544, 15492, 68, 1145324544, 15492, 68, 1145324544, 15492, 68, 1145324544, 15492, 68, 1145324544, 15492, 68, 1145324544, 15492, 68, 1145324544, 15496, 68, 1145324544, 15496, 68, 1145324544, 15496, 68, 1145324544, 15496, 68, 1145324544, 15496, 68, 1145324544, 15496, 68, 1145324544, 15496, 68, 1145324544, 15496, 68, 1145324544, 15508, 68, 1145324544, 15508, 68, 1145324544, 15508, 68, 1145324544, 15508, 68, 1145324544, 15508, 68, 1145324544, 15508, 68, 1145324544, 15508, 68, 1145324544, 15508, 68, 1145324544, 15512, 68, 1145324544, 15512, 68, 1145324544, 15512, 68, 1145324544, 15512, 68, 1145324544, 15512, 68, 1145324544, 15512, 68, 1145324544, 15512, 68, 1145324544, 15512, 68, 1145324544, 15936, 559240, 0, 15936, 559240, 0, 15936, 559240, 0, 15936, 559240, 0, 15936, 559240, 0, 16576, 17, 0, 16576, 17, 0, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 17472, 1145324612, 1145324612, 22464, 136, 0, 22464, 136, 0, 22480, 136, 0, 22480, 136, 0, 22496, 136, 0, 22496, 136, 0, 22912, 0, 2290649088, 22912, 0, 2290649088, 22912, 0, 2290649088, 22912, 0, 2290649088, 22912, 0, 2290649088, 22912, 0, 2290649088, 22928, 0, 2290649088, 22928, 0, 2290649088, 22928, 0, 2290649088, 22928, 0, 2290649088, 22928, 0, 2290649088, 22928, 0, 2290649088, 22944, 0, 2290649088, 22944, 0, 2290649088, 22944, 0, 2290649088, 22944, 0, 2290649088, 22944, 0, 2290649088, 22944, 0, 2290649088] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448252064742481_569_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448252064742481_569_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0b1d13a8 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448252064742481_569_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 5)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + } + } + if ((WaveGetLaneIndex() == 2)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 49))) { + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 45))) { + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 46)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((225 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5440, 131328, 139264, 5440, 131328, 139264, 5440, 131328, 139264, 5440, 131328, 139264, 10560, 134217728, 0, 11520, 2129920, 67108864, 11520, 2129920, 67108864, 11520, 2129920, 67108864] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448253493129439_570_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448253493129439_570_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c981618c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448253493129439_570_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,205 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((25 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 45)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 29))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 37449, 0, 1600, 37449, 0, 1600, 37449, 0, 1600, 37449, 0, 1600, 37449, 0, 1600, 37449, 0, 1604, 37449, 0, 1604, 37449, 0, 1604, 37449, 0, 1604, 37449, 0, 1604, 37449, 0, 1604, 37449, 0, 1608, 37449, 0, 1608, 37449, 0, 1608, 37449, 0, 1608, 37449, 0, 1608, 37449, 0, 1608, 37449, 0, 1616, 37449, 0, 1616, 37449, 0, 1616, 37449, 0, 1616, 37449, 0, 1616, 37449, 0, 1616, 37449, 0, 1620, 37449, 0, 1620, 37449, 0, 1620, 37449, 0, 1620, 37449, 0, 1620, 37449, 0, 1620, 37449, 0, 1624, 37449, 0, 1624, 37449, 0, 1624, 37449, 0, 1624, 37449, 0, 1624, 37449, 0, 1624, 37449, 0, 2048, 0, 2454257664, 2048, 0, 2454257664, 2048, 0, 2454257664, 2048, 0, 2454257664, 2048, 0, 2454257664, 2048, 0, 2454257664, 2052, 0, 2454257664, 2052, 0, 2454257664, 2052, 0, 2454257664, 2052, 0, 2454257664, 2052, 0, 2454257664, 2052, 0, 2454257664, 2056, 0, 2454257664, 2056, 0, 2454257664, 2056, 0, 2454257664, 2056, 0, 2454257664, 2056, 0, 2454257664, 2056, 0, 2454257664, 2064, 0, 2454257664, 2064, 0, 2454257664, 2064, 0, 2454257664, 2064, 0, 2454257664, 2064, 0, 2454257664, 2064, 0, 2454257664, 2068, 0, 2454257664, 2068, 0, 2454257664, 2068, 0, 2454257664, 2068, 0, 2454257664, 2068, 0, 2454257664, 2068, 0, 2454257664, 2072, 0, 2454257664, 2072, 0, 2454257664, 2072, 0, 2454257664, 2072, 0, 2454257664, 2072, 0, 2454257664, 2072, 0, 2454257664, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 2816, 272696336, 68174084, 3776, 613566756, 0, 3776, 613566756, 0, 3776, 613566756, 0, 3776, 613566756, 0, 3776, 613566756, 0, 3776, 613566756, 0, 3776, 613566756, 0, 3776, 613566756, 0, 3776, 613566756, 0, 3776, 613566756, 0, 3792, 613566756, 0, 3792, 613566756, 0, 3792, 613566756, 0, 3792, 613566756, 0, 3792, 613566756, 0, 3792, 613566756, 0, 3792, 613566756, 0, 3792, 613566756, 0, 3792, 613566756, 0, 3792, 613566756, 0, 5120, 545392672, 520, 5120, 545392672, 520, 5120, 545392672, 520, 5120, 545392672, 520, 5120, 545392672, 520, 5120, 545392672, 520, 5120, 545392672, 520, 5136, 545392672, 520, 5136, 545392672, 520, 5136, 545392672, 520, 5136, 545392672, 520, 5136, 545392672, 520, 5136, 545392672, 520, 5136, 545392672, 520, 5568, 1198372, 0, 5568, 1198372, 0, 5568, 1198372, 0, 5568, 1198372, 0, 5568, 1198372, 0, 5568, 1198372, 0, 5568, 1198372, 0, 5584, 1198372, 0, 5584, 1198372, 0, 5584, 1198372, 0, 5584, 1198372, 0, 5584, 1198372, 0, 5584, 1198372, 0, 5584, 1198372, 0, 10820, 268436480, 0, 10820, 268436480, 0, 10824, 268436480, 0, 10824, 268436480, 0, 10836, 268436480, 0, 10836, 268436480, 0, 10840, 268436480, 0, 10840, 268436480, 0, 12480, 268436480, 0, 12480, 268436480, 0, 12496, 268436480, 0, 12496, 268436480, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448344188281072_571_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448344188281072_571_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..36b65558 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448344188281072_571_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,325 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 37)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 19)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 61))) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((265 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 60)) { + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((277 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((300 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((319 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((324 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1088, 4095, 4278190080, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 1920, 8191, 4286578688, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 7616, 1145324612, 1145324612, 13376, 1, 3758096384, 13376, 1, 3758096384, 13376, 1, 3758096384, 13376, 1, 3758096384, 13392, 1, 3758096384, 13392, 1, 3758096384, 13392, 1, 3758096384, 13392, 1, 3758096384, 19204, 0, 536870912, 19208, 0, 536870912, 19212, 0, 536870912, 19220, 0, 536870912, 19224, 0, 536870912, 19228, 0, 536870912, 19840, 17, 0, 19840, 17, 0, 19856, 17, 0, 19856, 17, 0, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20736, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 20752, 1145324608, 1145324612, 21184, 559240, 0, 21184, 559240, 0, 21184, 559240, 0, 21184, 559240, 0, 21184, 559240, 0, 21200, 559240, 0, 21200, 559240, 0, 21200, 559240, 0, 21200, 559240, 0, 21200, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448360402520546_572_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448360402520546_572_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..23d4645b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448360402520546_572_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,248 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 28)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 40)) { + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((212 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 684 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 1088, 15, 4292870144, 2512, 64, 0, 2528, 64, 0, 2544, 64, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3600, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3604, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3608, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3616, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3620, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3624, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3632, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3636, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 3640, 65535, 0, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 5056, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 7616, 1431633920, 1398101, 10816, 268435456, 0, 10832, 268435456, 0, 12352, 33556544, 16384, 12352, 33556544, 16384, 12352, 33556544, 16384, 12352, 33556544, 16384, 12368, 33556544, 16384, 12368, 33556544, 16384, 12368, 33556544, 16384, 12368, 33556544, 16384, 13568, 33554432, 1073741824, 13568, 33554432, 1073741824, 13572, 33554432, 1073741824, 13572, 33554432, 1073741824, 13576, 33554432, 1073741824, 13576, 33554432, 1073741824, 13584, 33554432, 1073741824, 13584, 33554432, 1073741824, 13588, 33554432, 1073741824, 13588, 33554432, 1073741824, 13592, 33554432, 1073741824, 13592, 33554432, 1073741824, 14784, 8, 34816, 14784, 8, 34816, 14784, 8, 34816, 14800, 8, 34816, 14800, 8, 34816, 14800, 8, 34816] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448521428775910_574_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448521428775910_574_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6af73702 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448521428775910_574_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,328 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 39))) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 17)) { + if ((WaveGetLaneIndex() >= 41)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((214 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((254 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((272 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 156 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2368, 32768, 0, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1984, 1431655765, 1431655765, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1728, 666282, 2852126720, 1472, 33554432, 0, 5568, 8388608, 536871040, 5568, 8388608, 536871040, 5568, 8388608, 536871040, 14736, 4, 0, 16272, 4, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448522118923461_575_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448522118923461_575_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c267ed5e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448522118923461_575_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4480, 262656, 2147483656, 4480, 262656, 2147483656, 4480, 262656, 2147483656, 4480, 262656, 2147483656, 4224, 64, 33554688, 4224, 64, 33554688, 4224, 64, 33554688, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3968, 0, 2113928192, 3584, 256, 0, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85, 3200, 1431393301, 85] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448522832587693_576_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448522832587693_576_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..69cd3c33 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448522832587693_576_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,397 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 50)) { + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 47))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 51)) { + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 58))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 44)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((256 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 645 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1168, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1184, 1431655765, 1431655765, 1936, 0, 1431568384, 1936, 0, 1431568384, 1936, 0, 1431568384, 1936, 0, 1431568384, 1936, 0, 1431568384, 1936, 0, 1431568384, 1936, 0, 1431568384, 1952, 0, 1431568384, 1952, 0, 1431568384, 1952, 0, 1431568384, 1952, 0, 1431568384, 1952, 0, 1431568384, 1952, 0, 1431568384, 1952, 0, 1431568384, 2384, 0, 1073741824, 2400, 0, 1073741824, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2960, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 2976, 1431655765, 1431655765, 4224, 9, 0, 4224, 9, 0, 4240, 9, 0, 4240, 9, 0, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 6544, 68174084, 1090785345, 10112, 65, 0, 10112, 65, 0, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 11840, 613566756, 1227133513, 12736, 17, 0, 12736, 17, 0, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 19712, 1145324612, 1145324612, 20160, 559240, 0, 20160, 559240, 0, 20160, 559240, 0, 20160, 559240, 0, 20160, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448637605003213_577_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448637605003213_577_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0dfcc1e2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448637605003213_577_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,190 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 48)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1488, 0, 536870912, 1504, 0, 536870912, 2640, 8738, 0, 2640, 8738, 0, 2640, 8738, 0, 2640, 8738, 0, 2656, 8738, 0, 2656, 8738, 0, 2656, 8738, 0, 2656, 8738, 0, 3344, 2, 536870912, 3344, 2, 536870912, 3360, 2, 536870912, 3360, 2, 536870912, 7680, 559240, 0, 7680, 559240, 0, 7680, 559240, 0, 7680, 559240, 0, 7680, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448639415756466_578_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448639415756466_578_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0c3b8c32 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448639415756466_578_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,280 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 33)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 32))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6720, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 6736, 572662306, 572662306, 8768, 64, 0, 8784, 64, 0, 9600, 0, 4194304, 9616, 0, 4194304, 11008, 4195328, 67125252, 11008, 4195328, 67125252, 11008, 4195328, 67125252, 11008, 4195328, 67125252, 11008, 4195328, 67125252, 15104, 0, 1073741824, 15552, 16388, 0, 15552, 16388, 0, 16720, 4, 0, 16736, 4, 0, 16752, 4, 0, 17600, 559240, 0, 17600, 559240, 0, 17600, 559240, 0, 17600, 559240, 0, 17600, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448645929330028_579_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448645929330028_579_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cb9ac819 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448645929330028_579_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,347 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 55)) { + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 22)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 18)) { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 58))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 37)) { + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 46))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 15))) { + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (246 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 56))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((311 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((318 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 312 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6800, 73, 0, 6800, 73, 0, 6800, 73, 0, 6816, 73, 0, 6816, 73, 0, 6816, 73, 0, 7952, 1, 2453667840, 7952, 1, 2453667840, 7952, 1, 2453667840, 7952, 1, 2453667840, 7952, 1, 2453667840, 7968, 1, 2453667840, 7968, 1, 2453667840, 7968, 1, 2453667840, 7968, 1, 2453667840, 7968, 1, 2453667840, 8400, 0, 2449473536, 8400, 0, 2449473536, 8400, 0, 2449473536, 8416, 0, 2449473536, 8416, 0, 2449473536, 8416, 0, 2449473536, 8960, 272696336, 68174084, 8960, 272696336, 68174084, 8960, 272696336, 68174084, 8960, 272696336, 68174084, 8960, 272696336, 68174084, 8960, 272696336, 68174084, 8960, 272696336, 68174084, 8960, 272696336, 68174084, 8960, 272696336, 68174084, 8960, 272696336, 68174084, 19908, 0, 4278190080, 19908, 0, 4278190080, 19908, 0, 4278190080, 19908, 0, 4278190080, 19908, 0, 4278190080, 19908, 0, 4278190080, 19908, 0, 4278190080, 19908, 0, 4278190080, 19912, 0, 4278190080, 19912, 0, 4278190080, 19912, 0, 4278190080, 19912, 0, 4278190080, 19912, 0, 4278190080, 19912, 0, 4278190080, 19912, 0, 4278190080, 19912, 0, 4278190080, 19924, 0, 4278190080, 19924, 0, 4278190080, 19924, 0, 4278190080, 19924, 0, 4278190080, 19924, 0, 4278190080, 19924, 0, 4278190080, 19924, 0, 4278190080, 19924, 0, 4278190080, 19928, 0, 4278190080, 19928, 0, 4278190080, 19928, 0, 4278190080, 19928, 0, 4278190080, 19928, 0, 4278190080, 19928, 0, 4278190080, 19928, 0, 4278190080, 19928, 0, 4278190080, 19940, 0, 4278190080, 19940, 0, 4278190080, 19940, 0, 4278190080, 19940, 0, 4278190080, 19940, 0, 4278190080, 19940, 0, 4278190080, 19940, 0, 4278190080, 19940, 0, 4278190080, 19944, 0, 4278190080, 19944, 0, 4278190080, 19944, 0, 4278190080, 19944, 0, 4278190080, 19944, 0, 4278190080, 19944, 0, 4278190080, 19944, 0, 4278190080, 19944, 0, 4278190080, 20356, 15, 0, 20356, 15, 0, 20356, 15, 0, 20356, 15, 0, 20360, 15, 0, 20360, 15, 0, 20360, 15, 0, 20360, 15, 0, 20372, 15, 0, 20372, 15, 0, 20372, 15, 0, 20372, 15, 0, 20376, 15, 0, 20376, 15, 0, 20376, 15, 0, 20376, 15, 0, 20388, 15, 0, 20388, 15, 0, 20388, 15, 0, 20388, 15, 0, 20392, 15, 0, 20392, 15, 0, 20392, 15, 0, 20392, 15, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448663895376444_580_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448663895376444_580_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4747026e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448663895376444_580_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 22))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 399 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5696, 73, 0, 5696, 73, 0, 5696, 73, 0, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11328, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11344, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11360, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11904, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11920, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 11936, 68174084, 1090785345, 576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 5056, 4194304, 8388864, 5056, 4194304, 8388864, 5056, 4194304, 8388864, 4800, 524288, 131072, 4800, 524288, 131072, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4416, 4095, 0, 4160, 1048576, 2147484672, 4160, 1048576, 2147484672, 4160, 1048576, 2147484672] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448667410801038_581_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448667410801038_581_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..13942961 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448667410801038_581_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,211 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 7))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4608, 0, 32, 5440, 73, 0, 5440, 73, 0, 5440, 73, 0, 6016, 5201, 0, 6016, 5201, 0, 6016, 5201, 0, 6016, 5201, 0, 6016, 5201, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 6336, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7040, 32639, 0, 7680, 17, 0, 7680, 17, 0, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 8576, 1145324612, 1145324612, 9024, 559240, 0, 9024, 559240, 0, 9024, 559240, 0, 9024, 559240, 0, 9024, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448669410864722_582_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448669410864722_582_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6c76eb42 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448669410864722_582_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,144 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((38 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 44)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((91 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 327 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2000, 0, 613564416, 2000, 0, 613564416, 2000, 0, 613564416, 2000, 0, 613564416, 2000, 0, 613564416, 2000, 0, 613564416, 2004, 0, 613564416, 2004, 0, 613564416, 2004, 0, 613564416, 2004, 0, 613564416, 2004, 0, 613564416, 2004, 0, 613564416, 2016, 0, 613564416, 2016, 0, 613564416, 2016, 0, 613564416, 2016, 0, 613564416, 2016, 0, 613564416, 2016, 0, 613564416, 2020, 0, 613564416, 2020, 0, 613564416, 2020, 0, 613564416, 2020, 0, 613564416, 2020, 0, 613564416, 2020, 0, 613564416, 2448, 0, 612368384, 2448, 0, 612368384, 2448, 0, 612368384, 2452, 0, 612368384, 2452, 0, 612368384, 2452, 0, 612368384, 2464, 0, 612368384, 2464, 0, 612368384, 2464, 0, 612368384, 2468, 0, 612368384, 2468, 0, 612368384, 2468, 0, 612368384, 3152, 18, 0, 3152, 18, 0, 3168, 18, 0, 3168, 18, 0, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4096, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4112, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 4128, 0, 1840697344, 5824, 65552, 16384, 5824, 65552, 16384, 5824, 65552, 16384, 5828, 65552, 16384, 5828, 65552, 16384, 5828, 65552, 16384, 5832, 65552, 16384, 5832, 65552, 16384, 5832, 65552, 16384, 5840, 65552, 16384, 5840, 65552, 16384, 5840, 65552, 16384, 5844, 65552, 16384, 5844, 65552, 16384, 5844, 65552, 16384, 5848, 65552, 16384, 5848, 65552, 16384, 5848, 65552, 16384, 5856, 65552, 16384, 5856, 65552, 16384, 5856, 65552, 16384, 5860, 65552, 16384, 5860, 65552, 16384, 5860, 65552, 16384, 5864, 65552, 16384, 5864, 65552, 16384, 5864, 65552, 16384] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448708718582441_583_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448708718582441_583_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9cc4ead9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448708718582441_583_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,212 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 60))) { + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 43))) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((178 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 141 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6144, 1, 2415919104, 6144, 1, 2415919104, 6144, 1, 2415919104, 6784, 73, 0, 6784, 73, 0, 6784, 73, 0, 8512, 585, 2415919104, 8512, 585, 2415919104, 8512, 585, 2415919104, 8512, 585, 2415919104, 8512, 585, 2415919104, 8512, 585, 2415919104, 9728, 0, 67108864, 9744, 0, 67108864, 11396, 1024, 0, 11400, 1024, 0, 11412, 1024, 0, 11416, 1024, 0, 12612, 4194304, 131072, 12612, 4194304, 131072, 12616, 4194304, 131072, 12616, 4194304, 131072, 12628, 4194304, 131072, 12628, 4194304, 131072, 12632, 4194304, 131072, 12632, 4194304, 131072, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513, 13632, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448713578444500_584_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448713578444500_584_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cdea3704 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448713578444500_584_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448713933475941_585_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448713933475941_585_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b210e644 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448713933475941_585_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,241 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 34))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 62))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 2048, 0, 2560, 1, 0, 4112, 0, 33554432, 4128, 0, 33554432, 4144, 0, 33554432, 4560, 16384, 0, 4576, 16384, 0, 4592, 16384, 0, 5008, 0, 65536, 5024, 0, 65536, 5040, 0, 65536, 7360, 0, 1048576, 8576, 256, 0, 9792, 73, 0, 9792, 73, 0, 9792, 73, 0, 10368, 272696336, 68174084, 10368, 272696336, 68174084, 10368, 272696336, 68174084, 10368, 272696336, 68174084, 10368, 272696336, 68174084, 10368, 272696336, 68174084, 10368, 272696336, 68174084, 10368, 272696336, 68174084, 10368, 272696336, 68174084, 10368, 272696336, 68174084] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448746992170745_586_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448746992170745_586_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..df22584f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448746992170745_586_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,259 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() == 16)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 36))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 387 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2048, 17, 0, 2048, 17, 0, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 4352, 1145324612, 1145324612, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 7360, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8448, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 8464, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 9152, 2290649224, 2290649224, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13824, 1431655765, 1431655765, 13568, 33554432, 131072, 13568, 33554432, 131072, 13312, 131072, 8, 13312, 131072, 8, 13056, 32, 128, 13056, 32, 128, 12800, 0, 2860515328, 12800, 0, 2860515328, 12800, 0, 2860515328, 12800, 0, 2860515328, 12800, 0, 2860515328] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448800417467084_588_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448800417467084_588_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8e6555b1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448800417467084_588_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,251 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 50)) { + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 23)) { + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 60))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 456 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 0, 2454192128, 896, 0, 2454192128, 896, 0, 2454192128, 896, 0, 2454192128, 896, 0, 2454192128, 1344, 0, 2454192128, 1344, 0, 2454192128, 1344, 0, 2454192128, 1344, 0, 2454192128, 1344, 0, 2454192128, 2880, 1090785345, 66576, 2880, 1090785345, 66576, 2880, 1090785345, 66576, 2880, 1090785345, 66576, 2880, 1090785345, 66576, 2880, 1090785345, 66576, 2880, 1090785345, 66576, 2880, 1090785345, 66576, 2880, 1090785345, 66576, 2896, 1090785345, 66576, 2896, 1090785345, 66576, 2896, 1090785345, 66576, 2896, 1090785345, 66576, 2896, 1090785345, 66576, 2896, 1090785345, 66576, 2896, 1090785345, 66576, 2896, 1090785345, 66576, 2896, 1090785345, 66576, 2912, 1090785345, 66576, 2912, 1090785345, 66576, 2912, 1090785345, 66576, 2912, 1090785345, 66576, 2912, 1090785345, 66576, 2912, 1090785345, 66576, 2912, 1090785345, 66576, 2912, 1090785345, 66576, 2912, 1090785345, 66576, 3908, 136348168, 8322, 3908, 136348168, 8322, 3908, 136348168, 8322, 3908, 136348168, 8322, 3908, 136348168, 8322, 3908, 136348168, 8322, 3908, 136348168, 8322, 3908, 136348168, 8322, 3912, 136348168, 8322, 3912, 136348168, 8322, 3912, 136348168, 8322, 3912, 136348168, 8322, 3912, 136348168, 8322, 3912, 136348168, 8322, 3912, 136348168, 8322, 3912, 136348168, 8322, 3916, 136348168, 8322, 3916, 136348168, 8322, 3916, 136348168, 8322, 3916, 136348168, 8322, 3916, 136348168, 8322, 3916, 136348168, 8322, 3916, 136348168, 8322, 3916, 136348168, 8322, 3924, 136348168, 8322, 3924, 136348168, 8322, 3924, 136348168, 8322, 3924, 136348168, 8322, 3924, 136348168, 8322, 3924, 136348168, 8322, 3924, 136348168, 8322, 3924, 136348168, 8322, 3928, 136348168, 8322, 3928, 136348168, 8322, 3928, 136348168, 8322, 3928, 136348168, 8322, 3928, 136348168, 8322, 3928, 136348168, 8322, 3928, 136348168, 8322, 3928, 136348168, 8322, 3932, 136348168, 8322, 3932, 136348168, 8322, 3932, 136348168, 8322, 3932, 136348168, 8322, 3932, 136348168, 8322, 3932, 136348168, 8322, 3932, 136348168, 8322, 3932, 136348168, 8322, 3940, 136348168, 8322, 3940, 136348168, 8322, 3940, 136348168, 8322, 3940, 136348168, 8322, 3940, 136348168, 8322, 3940, 136348168, 8322, 3940, 136348168, 8322, 3940, 136348168, 8322, 3944, 136348168, 8322, 3944, 136348168, 8322, 3944, 136348168, 8322, 3944, 136348168, 8322, 3944, 136348168, 8322, 3944, 136348168, 8322, 3944, 136348168, 8322, 3944, 136348168, 8322, 3948, 136348168, 8322, 3948, 136348168, 8322, 3948, 136348168, 8322, 3948, 136348168, 8322, 3948, 136348168, 8322, 3948, 136348168, 8322, 3948, 136348168, 8322, 3948, 136348168, 8322, 4352, 0, 16, 4992, 599186, 0, 4992, 599186, 0, 4992, 599186, 0, 4992, 599186, 0, 4992, 599186, 0, 4992, 599186, 0, 4992, 599186, 0, 6400, 4260880, 0, 6400, 4260880, 0, 6400, 4260880, 0, 6400, 4260880, 0, 6416, 4260880, 0, 6416, 4260880, 0, 6416, 4260880, 0, 6416, 4260880, 0, 6432, 4260880, 0, 6432, 4260880, 0, 6432, 4260880, 0, 6432, 4260880, 0, 9664, 0, 2048, 12032, 0, 536870912, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513, 13056, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448820075277675_590_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448820075277675_590_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..04a8f632 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448820075277675_590_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,358 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 56))) { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 51))) { + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (267 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((282 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((305 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 53))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((329 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((344 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((357 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((376 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 8000, 537002016, 2097664, 8000, 537002016, 2097664, 8000, 537002016, 2097664, 8000, 537002016, 2097664, 8000, 537002016, 2097664, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 15936, 1145323588, 1145324612, 17088, 0, 1140850688, 17088, 0, 1140850688, 18048, 34952, 0, 18048, 34952, 0, 18048, 34952, 0, 18048, 34952, 0, 18064, 34952, 0, 18064, 34952, 0, 18064, 34952, 0, 18064, 34952, 0, 19520, 0, 524288, 19524, 0, 524288, 19528, 0, 524288, 19536, 0, 524288, 19540, 0, 524288, 19544, 0, 524288, 22848, 2184, 2290649088, 22848, 2184, 2290649088, 22848, 2184, 2290649088, 22848, 2184, 2290649088, 22848, 2184, 2290649088, 22848, 2184, 2290649088, 22848, 2184, 2290649088, 22848, 2184, 2290649088, 22848, 2184, 2290649088, 22852, 2184, 2290649088, 22852, 2184, 2290649088, 22852, 2184, 2290649088, 22852, 2184, 2290649088, 22852, 2184, 2290649088, 22852, 2184, 2290649088, 22852, 2184, 2290649088, 22852, 2184, 2290649088, 22852, 2184, 2290649088, 22856, 2184, 2290649088, 22856, 2184, 2290649088, 22856, 2184, 2290649088, 22856, 2184, 2290649088, 22856, 2184, 2290649088, 22856, 2184, 2290649088, 22856, 2184, 2290649088, 22856, 2184, 2290649088, 22856, 2184, 2290649088, 22864, 2184, 2290649088, 22864, 2184, 2290649088, 22864, 2184, 2290649088, 22864, 2184, 2290649088, 22864, 2184, 2290649088, 22864, 2184, 2290649088, 22864, 2184, 2290649088, 22864, 2184, 2290649088, 22864, 2184, 2290649088, 22868, 2184, 2290649088, 22868, 2184, 2290649088, 22868, 2184, 2290649088, 22868, 2184, 2290649088, 22868, 2184, 2290649088, 22868, 2184, 2290649088, 22868, 2184, 2290649088, 22868, 2184, 2290649088, 22868, 2184, 2290649088, 22872, 2184, 2290649088, 22872, 2184, 2290649088, 22872, 2184, 2290649088, 22872, 2184, 2290649088, 22872, 2184, 2290649088, 22872, 2184, 2290649088, 22872, 2184, 2290649088, 22872, 2184, 2290649088, 22872, 2184, 2290649088, 24064, 134217728, 0, 24068, 134217728, 0, 24072, 134217728, 0, 24080, 134217728, 0, 24084, 134217728, 0, 24088, 134217728, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448830746045590_591_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448830746045590_591_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53d44b92 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448830746045590_591_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,392 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() < 22)) { + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 28)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 62)) { + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 63))) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 62)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 30))) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((310 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + } + } else { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (351 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (361 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 47))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (399 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (422 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (431 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (450 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 558 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1216, 209715, 0, 1216, 209715, 0, 1216, 209715, 0, 1216, 209715, 0, 1216, 209715, 0, 1216, 209715, 0, 1216, 209715, 0, 1216, 209715, 0, 1216, 209715, 0, 1216, 209715, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2256, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2272, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 2288, 3355443, 0, 3600, 819, 0, 3600, 819, 0, 3600, 819, 0, 3600, 819, 0, 3600, 819, 0, 3600, 819, 0, 3616, 819, 0, 3616, 819, 0, 3616, 819, 0, 3616, 819, 0, 3616, 819, 0, 3616, 819, 0, 3632, 819, 0, 3632, 819, 0, 3632, 819, 0, 3632, 819, 0, 3632, 819, 0, 3632, 819, 0, 5120, 268435456, 1048832, 5120, 268435456, 1048832, 5120, 268435456, 1048832, 6336, 0, 556924928, 6336, 0, 556924928, 6336, 0, 556924928, 6336, 0, 556924928, 6336, 0, 556924928, 6352, 0, 556924928, 6352, 0, 556924928, 6352, 0, 556924928, 6352, 0, 556924928, 6352, 0, 556924928, 9920, 1073741824, 0, 9936, 1073741824, 0, 9952, 1073741824, 0, 10624, 64, 0, 10640, 64, 0, 10656, 64, 0, 11072, 0, 306258962, 11072, 0, 306258962, 11072, 0, 306258962, 11072, 0, 306258962, 11072, 0, 306258962, 11072, 0, 306258962, 11072, 0, 306258962, 11072, 0, 306258962, 11088, 0, 306258962, 11088, 0, 306258962, 11088, 0, 306258962, 11088, 0, 306258962, 11088, 0, 306258962, 11088, 0, 306258962, 11088, 0, 306258962, 11088, 0, 306258962, 11104, 0, 306258962, 11104, 0, 306258962, 11104, 0, 306258962, 11104, 0, 306258962, 11104, 0, 306258962, 11104, 0, 306258962, 11104, 0, 306258962, 11104, 0, 306258962, 11520, 0, 306257920, 11520, 0, 306257920, 11520, 0, 306257920, 11520, 0, 306257920, 11520, 0, 306257920, 11536, 0, 306257920, 11536, 0, 306257920, 11536, 0, 306257920, 11536, 0, 306257920, 11536, 0, 306257920, 11552, 0, 306257920, 11552, 0, 306257920, 11552, 0, 306257920, 11552, 0, 306257920, 11552, 0, 306257920, 11968, 0, 4194304, 11984, 0, 4194304, 12000, 0, 4194304, 12544, 272696336, 68174084, 12544, 272696336, 68174084, 12544, 272696336, 68174084, 12544, 272696336, 68174084, 12544, 272696336, 68174084, 12544, 272696336, 68174084, 12544, 272696336, 68174084, 12544, 272696336, 68174084, 12544, 272696336, 68174084, 12544, 272696336, 68174084, 14224, 0, 64, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 14848, 559240, 0, 16512, 2147491840, 8192, 16512, 2147491840, 8192, 16512, 2147491840, 8192, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 16128, 1431655765, 1431655765, 22464, 528384, 526336, 22464, 528384, 526336, 22464, 528384, 526336, 22464, 528384, 526336, 28800, 262148, 4194560, 28800, 262148, 4194560, 28800, 262148, 4194560, 28800, 262148, 4194560] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448893080189569_592_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448893080189569_592_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..706c0d24 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448893080189569_592_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,199 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 3856, 16, 0, 3872, 16, 0, 3888, 16, 0, 4432, 16, 0, 4448, 16, 0, 4464, 16, 0, 4864, 65536, 0, 6464, 272696320, 68174084, 6464, 272696320, 68174084, 6464, 272696320, 68174084, 6464, 272696320, 68174084, 6464, 272696320, 68174084, 6464, 272696320, 68174084, 6464, 272696320, 68174084, 6464, 272696320, 68174084, 6464, 272696320, 68174084, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513, 7360, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756448894070738635_593_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756448894070738635_593_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9181a2da --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756448894070738635_593_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,341 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 56)) { + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 6))) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 58)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((150 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 56))) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if ((WaveGetLaneIndex() == 34)) { + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (285 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((303 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 22)) { + if ((WaveGetLaneIndex() < 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((313 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((320 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((331 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (338 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 990 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9040, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9044, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9056, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9060, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9072, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9076, 68174084, 1090785345, 9616, 545392672, 136348168, 9616, 545392672, 136348168, 9616, 545392672, 136348168, 9616, 545392672, 136348168, 9616, 545392672, 136348168, 9616, 545392672, 136348168, 9616, 545392672, 136348168, 9616, 545392672, 136348168, 9616, 545392672, 136348168, 9616, 545392672, 136348168, 9620, 545392672, 136348168, 9620, 545392672, 136348168, 9620, 545392672, 136348168, 9620, 545392672, 136348168, 9620, 545392672, 136348168, 9620, 545392672, 136348168, 9620, 545392672, 136348168, 9620, 545392672, 136348168, 9620, 545392672, 136348168, 9620, 545392672, 136348168, 9632, 545392672, 136348168, 9632, 545392672, 136348168, 9632, 545392672, 136348168, 9632, 545392672, 136348168, 9632, 545392672, 136348168, 9632, 545392672, 136348168, 9632, 545392672, 136348168, 9632, 545392672, 136348168, 9632, 545392672, 136348168, 9632, 545392672, 136348168, 9636, 545392672, 136348168, 9636, 545392672, 136348168, 9636, 545392672, 136348168, 9636, 545392672, 136348168, 9636, 545392672, 136348168, 9636, 545392672, 136348168, 9636, 545392672, 136348168, 9636, 545392672, 136348168, 9636, 545392672, 136348168, 9636, 545392672, 136348168, 9648, 545392672, 136348168, 9648, 545392672, 136348168, 9648, 545392672, 136348168, 9648, 545392672, 136348168, 9648, 545392672, 136348168, 9648, 545392672, 136348168, 9648, 545392672, 136348168, 9648, 545392672, 136348168, 9648, 545392672, 136348168, 9648, 545392672, 136348168, 9652, 545392672, 136348168, 9652, 545392672, 136348168, 9652, 545392672, 136348168, 9652, 545392672, 136348168, 9652, 545392672, 136348168, 9652, 545392672, 136348168, 9652, 545392672, 136348168, 9652, 545392672, 136348168, 9652, 545392672, 136348168, 9652, 545392672, 136348168, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12352, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12368, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 12384, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14336, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14352, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 14368, 1431655765, 1431655765, 17600, 0, 16777216, 17616, 0, 16777216] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449273020559665_598_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449273020559665_598_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e733eae5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449273020559665_598_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 57)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 56))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (231 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 0, 67108864, 9152, 0, 3758096384, 9152, 0, 3758096384, 9152, 0, 3758096384, 9600, 0, 32505856, 9600, 0, 32505856, 9600, 0, 32505856, 9600, 0, 32505856, 9600, 0, 32505856, 10880, 1023, 0, 10880, 1023, 0, 10880, 1023, 0, 10880, 1023, 0, 10880, 1023, 0, 10880, 1023, 0, 10880, 1023, 0, 10880, 1023, 0, 10880, 1023, 0, 10880, 1023, 0, 11792, 16384, 0, 11808, 16384, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13312, 33554431, 0, 13952, 85, 0, 13952, 85, 0, 13952, 85, 0, 13952, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449314123824166_600_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449314123824166_600_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..005b51df --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449314123824166_600_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,236 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 20))) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 41)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 108 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4672, 0, 2147483648, 4688, 0, 2147483648, 5376, 0, 2147483648, 5392, 0, 2147483648, 5824, 0, 131072, 6464, 73, 0, 6464, 73, 0, 6464, 73, 0, 7744, 599186, 0, 7744, 599186, 0, 7744, 599186, 0, 7744, 599186, 0, 7744, 599186, 0, 7744, 599186, 0, 7744, 599186, 0, 9216, 268435456, 32768, 9216, 268435456, 32768, 9232, 268435456, 32768, 9232, 268435456, 32768, 9248, 268435456, 32768, 9248, 268435456, 32768, 9856, 32, 0, 10496, 85, 0, 10496, 85, 0, 10496, 85, 0, 10496, 85, 0, 11536, 2097152, 0, 11552, 2097152, 0, 11568, 2097152, 0, 13072, 0, 131072, 13088, 0, 131072, 13104, 0, 131072] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449332787702715_601_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449332787702715_601_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1dce76d6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449332787702715_601_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,99 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 56))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 59)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449392807331700_603_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449392807331700_603_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0fcc6766 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449392807331700_603_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,377 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 62)) { + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 59))) { + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + case 3: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 30)) { + if ((WaveGetLaneIndex() < 23)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((257 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 2)) { + break; + } + } + } else { + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((270 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 3); i5 = (i5 + 1)) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((301 << 6) | (counter3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((326 << 6) | (counter3 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + continue; + } + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 3136, 273, 286326784, 3136, 273, 286326784, 3136, 273, 286326784, 3136, 273, 286326784, 3136, 273, 286326784, 3136, 273, 286326784, 3136, 273, 286326784, 3152, 273, 286326784, 3152, 273, 286326784, 3152, 273, 286326784, 3152, 273, 286326784, 3152, 273, 286326784, 3152, 273, 286326784, 3152, 273, 286326784, 3168, 273, 286326784, 3168, 273, 286326784, 3168, 273, 286326784, 3168, 273, 286326784, 3168, 273, 286326784, 3168, 273, 286326784, 3168, 273, 286326784, 6272, 4369, 285212672, 6272, 4369, 285212672, 6272, 4369, 285212672, 6272, 4369, 285212672, 6272, 4369, 285212672, 6272, 4369, 285212672, 6288, 4369, 285212672, 6288, 4369, 285212672, 6288, 4369, 285212672, 6288, 4369, 285212672, 6288, 4369, 285212672, 6288, 4369, 285212672, 6304, 4369, 285212672, 6304, 4369, 285212672, 6304, 4369, 285212672, 6304, 4369, 285212672, 6304, 4369, 285212672, 6304, 4369, 285212672, 7488, 64, 0, 14864, 0, 2290649216, 14864, 0, 2290649216, 14864, 0, 2290649216, 14864, 0, 2290649216, 14864, 0, 2290649216, 14864, 0, 2290649216, 14864, 0, 2290649216, 14880, 0, 2290649216, 14880, 0, 2290649216, 14880, 0, 2290649216, 14880, 0, 2290649216, 14880, 0, 2290649216, 14880, 0, 2290649216, 14880, 0, 2290649216, 15504, 559240, 0, 15504, 559240, 0, 15504, 559240, 0, 15504, 559240, 0, 15504, 559240, 0, 15520, 559240, 0, 15520, 559240, 0, 15520, 559240, 0, 15520, 559240, 0, 15520, 559240, 0, 19280, 0, 32768, 19284, 0, 32768, 19288, 0, 32768, 19296, 0, 32768, 19300, 0, 32768, 19304, 0, 32768, 20880, 0, 8, 20884, 0, 8, 20888, 0, 8, 20896, 0, 8, 20900, 0, 8, 20904, 0, 8] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449548905782010_606_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449548905782010_606_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..715022e8 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449548905782010_606_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 53))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 44))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449751980473733_610_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449751980473733_610_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..525c5c5d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449751980473733_610_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,325 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 62))) { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 45))) { + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 47))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() == 51)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 49)) { + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((284 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2384, 1073741824, 268435488, 2384, 1073741824, 268435488, 2384, 1073741824, 268435488, 2400, 1073741824, 268435488, 2400, 1073741824, 268435488, 2400, 1073741824, 268435488, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3344, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3348, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3360, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 3364, 0, 3067833728, 6800, 0, 1024, 6816, 0, 1024, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 13632, 85, 0, 13632, 85, 0, 13632, 85, 0, 13632, 85, 0, 16000, 0, 524288, 16016, 0, 524288, 16032, 0, 524288, 16448, 0, 524288, 16464, 0, 524288, 16480, 0, 524288] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449829595871083_613_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449829595871083_613_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..517d5028 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449829595871083_613_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((156 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 43)) { + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 16842752, 0, 1600, 16842752, 0, 1616, 16842752, 0, 1616, 16842752, 0, 2816, 4, 65536, 2816, 4, 65536, 2832, 4, 65536, 2832, 4, 65536, 4032, 85, 0, 4032, 85, 0, 4032, 85, 0, 4032, 85, 0, 5248, 17, 0, 5248, 17, 0, 10000, 514, 32, 10000, 514, 32, 10000, 514, 32, 10004, 514, 32, 10004, 514, 32, 10004, 514, 32, 10008, 514, 32, 10008, 514, 32, 10008, 514, 32, 10016, 514, 32, 10016, 514, 32, 10016, 514, 32, 10020, 514, 32, 10020, 514, 32, 10020, 514, 32, 10024, 514, 32, 10024, 514, 32, 10024, 514, 32, 10032, 514, 32, 10032, 514, 32, 10032, 514, 32, 10036, 514, 32, 10036, 514, 32, 10036, 514, 32, 10040, 514, 32, 10040, 514, 32, 10040, 514, 32, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10304, 1145324612, 1145324612, 10944, 8, 0, 12944, 0, 134217728, 12960, 0, 134217728] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449856615759598_614_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449856615759598_614_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..418201dc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449856615759598_614_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 29)) { + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 339 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2112, 273, 0, 2112, 273, 0, 2112, 273, 0, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 2688, 286331153, 286331153, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3008, 2004318071, 2004318071, 3456, 559240, 0, 3456, 559240, 0, 3456, 559240, 0, 3456, 559240, 0, 3456, 559240, 0, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5824, 0, 4294705152, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5568, 1431655765, 87381, 5184, 2, 8, 5184, 2, 8] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449858351515135_615_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449858351515135_615_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..876c863b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449858351515135_615_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,316 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((32 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 47))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 4)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 53)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 39)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((237 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((248 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter1 == 2)) { + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((322 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 56))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((352 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((367 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (389 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 300 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2064, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 2080, 286331153, 286331153, 7936, 4, 0, 8512, 4, 0, 9536, 559240, 0, 9536, 559240, 0, 9536, 559240, 0, 9536, 559240, 0, 9536, 559240, 0, 12288, 268435584, 8192, 12288, 268435584, 8192, 12288, 268435584, 8192, 12032, 16779264, 8388608, 12032, 16779264, 8388608, 12032, 16779264, 8388608, 11776, 256, 16384, 11776, 256, 16384, 17024, 4096, 268435712, 17024, 4096, 268435712, 17024, 4096, 268435712, 19008, 196616, 1073743872, 19008, 196616, 1073743872, 19008, 196616, 1073743872, 19008, 196616, 1073743872, 19008, 196616, 1073743872, 19024, 196616, 1073743872, 19024, 196616, 1073743872, 19024, 196616, 1073743872, 19024, 196616, 1073743872, 19024, 196616, 1073743872, 20608, 520, 16, 20608, 520, 16, 20608, 520, 16, 20612, 520, 16, 20612, 520, 16, 20612, 520, 16, 20616, 520, 16, 20616, 520, 16, 20616, 520, 16, 20624, 520, 16, 20624, 520, 16, 20624, 520, 16, 20628, 520, 16, 20628, 520, 16, 20628, 520, 16, 20632, 520, 16, 20632, 520, 16, 20632, 520, 16, 22528, 0, 16777216, 22532, 0, 16777216, 22536, 0, 16777216, 22544, 0, 16777216, 22548, 0, 16777216, 22552, 0, 16777216, 23488, 2147483776, 0, 23488, 2147483776, 0, 23492, 2147483776, 0, 23492, 2147483776, 0, 23496, 2147483776, 0, 23496, 2147483776, 0, 23504, 2147483776, 0, 23504, 2147483776, 0, 23508, 2147483776, 0, 23508, 2147483776, 0, 23512, 2147483776, 0, 23512, 2147483776, 0, 24896, 262146, 4195328, 24896, 262146, 4195328, 24896, 262146, 4195328, 24896, 262146, 4195328] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756449968232963032_616_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756449968232963032_616_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..405b3f71 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756449968232963032_616_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,133 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((105 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 171 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2240, 0, 256, 2256, 0, 256, 3396, 1, 286330880, 3396, 1, 286330880, 3396, 1, 286330880, 3396, 1, 286330880, 3396, 1, 286330880, 3396, 1, 286330880, 3400, 1, 286330880, 3400, 1, 286330880, 3400, 1, 286330880, 3400, 1, 286330880, 3400, 1, 286330880, 3400, 1, 286330880, 3404, 1, 286330880, 3404, 1, 286330880, 3404, 1, 286330880, 3404, 1, 286330880, 3404, 1, 286330880, 3404, 1, 286330880, 3412, 1, 286330880, 3412, 1, 286330880, 3412, 1, 286330880, 3412, 1, 286330880, 3412, 1, 286330880, 3412, 1, 286330880, 3416, 1, 286330880, 3416, 1, 286330880, 3416, 1, 286330880, 3416, 1, 286330880, 3416, 1, 286330880, 3416, 1, 286330880, 3420, 1, 286330880, 3420, 1, 286330880, 3420, 1, 286330880, 3420, 1, 286330880, 3420, 1, 286330880, 3420, 1, 286330880, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 4288, 1145324612, 1145324612, 6720, 0, 32768, 6736, 0, 32768, 6752, 0, 32768] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450061000917466_619_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450061000917466_619_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..81e8cd90 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450061000917466_619_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,109 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 36 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2128, 536871040, 0, 2128, 536871040, 0, 2144, 536871040, 0, 2144, 536871040, 0, 2832, 2147483648, 8, 2832, 2147483648, 8, 2848, 2147483648, 8, 2848, 2147483648, 8] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450104298890319_621_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450104298890319_621_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7e49efce --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450104298890319_621_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 42))) { + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5648, 0, 128, 5664, 0, 128, 5680, 0, 128, 6272, 85, 0, 6272, 85, 0, 6272, 85, 0, 6272, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450104877034894_622_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450104877034894_622_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2fa272c7 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450104877034894_622_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 3))) { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (190 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 507 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 2192, 17, 286330880, 2192, 17, 286330880, 2192, 17, 286330880, 2192, 17, 286330880, 2192, 17, 286330880, 2192, 17, 286330880, 2192, 17, 286330880, 2208, 17, 286330880, 2208, 17, 286330880, 2208, 17, 286330880, 2208, 17, 286330880, 2208, 17, 286330880, 2208, 17, 286330880, 2208, 17, 286330880, 2896, 69905, 286326784, 2896, 69905, 286326784, 2896, 69905, 286326784, 2896, 69905, 286326784, 2896, 69905, 286326784, 2896, 69905, 286326784, 2896, 69905, 286326784, 2896, 69905, 286326784, 2896, 69905, 286326784, 2912, 69905, 286326784, 2912, 69905, 286326784, 2912, 69905, 286326784, 2912, 69905, 286326784, 2912, 69905, 286326784, 2912, 69905, 286326784, 2912, 69905, 286326784, 2912, 69905, 286326784, 2912, 69905, 286326784, 8064, 1092, 1145307136, 8064, 1092, 1145307136, 8064, 1092, 1145307136, 8064, 1092, 1145307136, 8064, 1092, 1145307136, 8064, 1092, 1145307136, 8064, 1092, 1145307136, 8768, 4, 1073741824, 8768, 4, 1073741824, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 9792, 1431655765, 1431655765, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 12160, 2863311530, 2863311530, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765, 11904, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450107352113184_624_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450107352113184_624_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d02a520c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450107352113184_624_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((120 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i1 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3600, 8390656, 524288, 3600, 8390656, 524288, 3600, 8390656, 524288, 3616, 8390656, 524288, 3616, 8390656, 524288, 3616, 8390656, 524288, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472, 4864, 65535, 3221225472] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450122195726496_626_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450122195726496_626_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c89417f5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450122195726496_626_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 42)) { + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 141 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1984, 16, 1048576, 1984, 16, 1048576, 2000, 16, 1048576, 2000, 16, 1048576, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 3712, 1145324612, 1145324612, 4864, 0, 2290089984, 4864, 0, 2290089984, 4864, 0, 2290089984, 5504, 17, 0, 5504, 17, 0, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6400, 1145324612, 1145324612, 6848, 559240, 0, 6848, 559240, 0, 6848, 559240, 0, 6848, 559240, 0, 6848, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450290624730027_630_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450290624730027_630_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4c9700d5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450290624730027_630_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,301 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 37)) { + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 16))) { + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 63))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((283 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 216 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1856, 146, 0, 1856, 146, 0, 1856, 146, 0, 4480, 146, 0, 4480, 146, 0, 4480, 146, 0, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 4800, 613566756, 1227133513, 13392, 15, 0, 13392, 15, 0, 13392, 15, 0, 13392, 15, 0, 13408, 15, 0, 13408, 15, 0, 13408, 15, 0, 13408, 15, 0, 14224, 15, 0, 14224, 15, 0, 14224, 15, 0, 14224, 15, 0, 14240, 15, 0, 14240, 15, 0, 14240, 15, 0, 14240, 15, 0, 15296, 15, 0, 15296, 15, 0, 15296, 15, 0, 15296, 15, 0, 15936, 85, 0, 15936, 85, 0, 15936, 85, 0, 15936, 85, 0, 18560, 2730, 0, 18560, 2730, 0, 18560, 2730, 0, 18560, 2730, 0, 18560, 2730, 0, 18560, 2730, 0, 18576, 2730, 0, 18576, 2730, 0, 18576, 2730, 0, 18576, 2730, 0, 18576, 2730, 0, 18576, 2730, 0, 18592, 2730, 0, 18592, 2730, 0, 18592, 2730, 0, 18592, 2730, 0, 18592, 2730, 0, 18592, 2730, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450308636954237_631_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450308636954237_631_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..515edb80 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450308636954237_631_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,170 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 96 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 0, 1431655424, 1168, 0, 1431655424, 1168, 0, 1431655424, 1168, 0, 1431655424, 1168, 0, 1431655424, 1168, 0, 1431655424, 1168, 0, 1431655424, 1168, 0, 1431655424, 1168, 0, 1431655424, 1168, 0, 1431655424, 1168, 0, 1431655424, 2320, 1, 1426063360, 2320, 1, 1426063360, 2320, 1, 1426063360, 2320, 1, 1426063360, 2320, 1, 1426063360, 3024, 1, 1073741824, 3024, 1, 1073741824, 3600, 340, 0, 3600, 340, 0, 3600, 340, 0, 3600, 340, 0, 6912, 545392672, 136348168, 6912, 545392672, 136348168, 6912, 545392672, 136348168, 6912, 545392672, 136348168, 6912, 545392672, 136348168, 6912, 545392672, 136348168, 6912, 545392672, 136348168, 6912, 545392672, 136348168, 6912, 545392672, 136348168, 6912, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450310974743424_632_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450310974743424_632_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e3d37ae9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450310974743424_632_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 43))) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450311387606737_633_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450311387606737_633_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..afcf0b73 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450311387606737_633_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,358 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 62))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 53))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (271 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 14))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 12))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (359 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 183 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 2496, 511, 4294901760, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 1472, 2863311360, 43690, 3136, 73, 0, 3136, 73, 0, 3136, 73, 0, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 3712, 272696336, 68174084, 8896, 0, 16781312, 8896, 0, 16781312, 12672, 0, 2097152] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450317951062329_634_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450317951062329_634_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c1b6e59 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450317951062329_634_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 207 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1232, 65, 0, 1232, 65, 0, 1248, 65, 0, 1248, 65, 0, 1264, 65, 0, 1264, 65, 0, 1680, 0, 272696336, 1680, 0, 272696336, 1680, 0, 272696336, 1680, 0, 272696336, 1680, 0, 272696336, 1696, 0, 272696336, 1696, 0, 272696336, 1696, 0, 272696336, 1696, 0, 272696336, 1696, 0, 272696336, 1712, 0, 272696336, 1712, 0, 272696336, 1712, 0, 272696336, 1712, 0, 272696336, 1712, 0, 272696336, 2240, 272696336, 68174084, 2240, 272696336, 68174084, 2240, 272696336, 68174084, 2240, 272696336, 68174084, 2240, 272696336, 68174084, 2240, 272696336, 68174084, 2240, 272696336, 68174084, 2240, 272696336, 68174084, 2240, 272696336, 68174084, 2240, 272696336, 68174084, 3280, 0, 1090781184, 3280, 0, 1090781184, 3280, 0, 1090781184, 3296, 0, 1090781184, 3296, 0, 1090781184, 3296, 0, 1090781184, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450320847508102_635_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450320847508102_635_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..876cac40 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450320847508102_635_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((61 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 165 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 255, 0, 1024, 255, 0, 1024, 255, 0, 1024, 255, 0, 1024, 255, 0, 1024, 255, 0, 1024, 255, 0, 1024, 255, 0, 2496, 536871040, 1024, 2496, 536871040, 1024, 2496, 536871040, 1024, 2512, 536871040, 1024, 2512, 536871040, 1024, 2512, 536871040, 1024, 2528, 536871040, 1024, 2528, 536871040, 1024, 2528, 536871040, 1024, 3908, 33024, 2, 3908, 33024, 2, 3908, 33024, 2, 3912, 33024, 2, 3912, 33024, 2, 3912, 33024, 2, 3916, 33024, 2, 3916, 33024, 2, 3916, 33024, 2, 3924, 33024, 2, 3924, 33024, 2, 3924, 33024, 2, 3928, 33024, 2, 3928, 33024, 2, 3928, 33024, 2, 3932, 33024, 2, 3932, 33024, 2, 3932, 33024, 2, 3940, 33024, 2, 3940, 33024, 2, 3940, 33024, 2, 3944, 33024, 2, 3944, 33024, 2, 3944, 33024, 2, 3948, 33024, 2, 3948, 33024, 2, 3948, 33024, 2, 4864, 33562624, 1024, 4864, 33562624, 1024, 4864, 33562624, 1024, 4880, 33562624, 1024, 4880, 33562624, 1024, 4880, 33562624, 1024, 4896, 33562624, 1024, 4896, 33562624, 1024, 4896, 33562624, 1024, 5776, 256, 0, 5792, 256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450359140326839_637_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450359140326839_637_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8edbdde3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450359140326839_637_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4864, 0, 16777216, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4480, 524287, 0, 4096, 4194304, 540672, 4096, 4194304, 540672, 4096, 4194304, 540672, 3840, 67108864, 1073741888, 3840, 67108864, 1073741888, 3840, 67108864, 1073741888, 3456, 16777216, 268435456, 3456, 16777216, 268435456] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450463881405467_639_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450463881405467_639_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..338d7078 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450463881405467_639_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,320 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 26)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 40))) { + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((238 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((245 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((255 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((i0 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 207 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 1, 0, 7360, 268501008, 1048832, 7360, 268501008, 1048832, 7360, 268501008, 1048832, 7360, 268501008, 1048832, 7360, 268501008, 1048832, 10560, 256, 0, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12544, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 12560, 2290649224, 2290649224, 13696, 136, 2147483648, 13696, 136, 2147483648, 13696, 136, 2147483648, 13712, 136, 2147483648, 13712, 136, 2147483648, 13712, 136, 2147483648, 15680, 34952, 0, 15680, 34952, 0, 15680, 34952, 0, 15680, 34952, 0, 15696, 34952, 0, 15696, 34952, 0, 15696, 34952, 0, 15696, 34952, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450465364151779_640_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450465364151779_640_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cfd19655 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450465364151779_640_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,161 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((44 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 579 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2196, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2200, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2204, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2212, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2216, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2220, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2228, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2232, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 2236, 699050, 2862612480, 4180, 559240, 0, 4180, 559240, 0, 4180, 559240, 0, 4180, 559240, 0, 4180, 559240, 0, 4184, 559240, 0, 4184, 559240, 0, 4184, 559240, 0, 4184, 559240, 0, 4184, 559240, 0, 4188, 559240, 0, 4188, 559240, 0, 4188, 559240, 0, 4188, 559240, 0, 4188, 559240, 0, 4196, 559240, 0, 4196, 559240, 0, 4196, 559240, 0, 4196, 559240, 0, 4196, 559240, 0, 4200, 559240, 0, 4200, 559240, 0, 4200, 559240, 0, 4200, 559240, 0, 4200, 559240, 0, 4204, 559240, 0, 4204, 559240, 0, 4204, 559240, 0, 4204, 559240, 0, 4204, 559240, 0, 4212, 559240, 0, 4212, 559240, 0, 4212, 559240, 0, 4212, 559240, 0, 4212, 559240, 0, 4216, 559240, 0, 4216, 559240, 0, 4216, 559240, 0, 4216, 559240, 0, 4216, 559240, 0, 4220, 559240, 0, 4220, 559240, 0, 4220, 559240, 0, 4220, 559240, 0, 4220, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450502625792132_641_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450502625792132_641_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f4fbe968 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450502625792132_641_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,195 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 28))) { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 55))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 63)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((196 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((203 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [13568, 272696336, 68174084, 13568, 272696336, 68174084, 13568, 272696336, 68174084, 13568, 272696336, 68174084, 13568, 272696336, 68174084, 13568, 272696336, 68174084, 13568, 272696336, 68174084, 13568, 272696336, 68174084, 13568, 272696336, 68174084, 13568, 272696336, 68174084, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513, 13888, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450507363876965_642_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450507363876965_642_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f20f9e6b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450507363876965_642_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,366 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (85 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 56))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 14)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 53))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((275 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (308 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 2368, 1048832, 16781313, 2368, 1048832, 16781313, 2368, 1048832, 16781313, 2368, 1048832, 16781313, 2368, 1048832, 16781313, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 7680, 1145324612, 1145324612, 8320, 2184, 0, 8320, 2184, 0, 8320, 2184, 0, 14016, 2184, 0, 14016, 2184, 0, 14016, 2184, 0, 15296, 0, 2281701376, 15296, 0, 2281701376, 18496, 32768, 0, 19136, 85, 0, 19136, 85, 0, 19136, 85, 0, 19136, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450513354036220_643_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450513354036220_643_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..88c88910 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450513354036220_643_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,200 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 55))) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((141 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((160 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((165 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((193 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 219 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4480, 1073741888, 1048640, 4480, 1073741888, 1048640, 4480, 1073741888, 1048640, 4480, 1073741888, 1048640, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 4096, 2863311530, 2863311530, 3840, 262144, 0, 3584, 0, 4, 3200, 0, 1409286144, 3200, 0, 1409286144, 3200, 0, 1409286144, 5392, 256, 0, 5408, 256, 0, 7376, 8388928, 268435488, 7376, 8388928, 268435488, 7376, 8388928, 268435488, 7376, 8388928, 268435488, 7376, 8388928, 268435488, 7380, 8388928, 268435488, 7380, 8388928, 268435488, 7380, 8388928, 268435488, 7380, 8388928, 268435488, 7380, 8388928, 268435488, 7384, 8388928, 268435488, 7384, 8388928, 268435488, 7384, 8388928, 268435488, 7384, 8388928, 268435488, 7384, 8388928, 268435488, 7392, 8388928, 268435488, 7392, 8388928, 268435488, 7392, 8388928, 268435488, 7392, 8388928, 268435488, 7392, 8388928, 268435488, 7396, 8388928, 268435488, 7396, 8388928, 268435488, 7396, 8388928, 268435488, 7396, 8388928, 268435488, 7396, 8388928, 268435488, 7400, 8388928, 268435488, 7400, 8388928, 268435488, 7400, 8388928, 268435488, 7400, 8388928, 268435488, 7400, 8388928, 268435488] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450530150976578_644_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450530150976578_644_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..31e164cd --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450530150976578_644_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 38)) { + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 55)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 0, 286331136, 768, 0, 286331136, 768, 0, 286331136, 768, 0, 286331136, 768, 0, 286331136, 768, 0, 286331136, 5648, 16777472, 1, 5648, 16777472, 1, 5648, 16777472, 1, 5664, 16777472, 1, 5664, 16777472, 1, 5664, 16777472, 1, 5680, 16777472, 1, 5680, 16777472, 1, 5680, 16777472, 1, 8976, 32, 2097152, 8976, 32, 2097152, 8992, 32, 2097152, 8992, 32, 2097152, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9280, 1145324612, 1145324612, 9728, 559240, 0, 9728, 559240, 0, 9728, 559240, 0, 9728, 559240, 0, 9728, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450532649556760_645_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450532649556760_645_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d9267990 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450532649556760_645_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,190 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 47))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((103 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 375 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 1088, 511, 4294836224, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 511, 4294930432, 3712, 0, 32768, 6592, 32256, 0, 6592, 32256, 0, 6592, 32256, 0, 6592, 32256, 0, 6592, 32256, 0, 6592, 32256, 0, 6596, 32256, 0, 6596, 32256, 0, 6596, 32256, 0, 6596, 32256, 0, 6596, 32256, 0, 6596, 32256, 0, 6600, 32256, 0, 6600, 32256, 0, 6600, 32256, 0, 6600, 32256, 0, 6600, 32256, 0, 6600, 32256, 0, 6608, 32256, 0, 6608, 32256, 0, 6608, 32256, 0, 6608, 32256, 0, 6608, 32256, 0, 6608, 32256, 0, 6612, 32256, 0, 6612, 32256, 0, 6612, 32256, 0, 6612, 32256, 0, 6612, 32256, 0, 6612, 32256, 0, 6616, 32256, 0, 6616, 32256, 0, 6616, 32256, 0, 6616, 32256, 0, 6616, 32256, 0, 6616, 32256, 0, 7680, 85, 0, 7680, 85, 0, 7680, 85, 0, 7680, 85, 0, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765, 8256, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450616393577312_647_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450616393577312_647_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1861df43 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450616393577312_647_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 29)) { + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1920, 520, 2181038080, 1920, 520, 2181038080, 1920, 520, 2181038080, 1920, 520, 2181038080, 2624, 520, 2181038080, 2624, 520, 2181038080, 2624, 520, 2181038080, 2624, 520, 2181038080, 5312, 0, 536870912, 5328, 0, 536870912] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450616939933040_648_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450616939933040_648_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f9df7477 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450616939933040_648_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,240 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 54))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((158 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1792, 64, 0, 2368, 4195328, 67125252, 2368, 4195328, 67125252, 2368, 4195328, 67125252, 2368, 4195328, 67125252, 2368, 4195328, 67125252, 3584, 4, 1073741824, 3584, 4, 1073741824, 3600, 4, 1073741824, 3600, 4, 1073741824, 3616, 4, 1073741824, 3616, 4, 1073741824, 4672, 67125252, 1074004032, 4672, 67125252, 1074004032, 4672, 67125252, 1074004032, 4672, 67125252, 1074004032, 4672, 67125252, 1074004032, 4672, 67125252, 1074004032, 4676, 67125252, 1074004032, 4676, 67125252, 1074004032, 4676, 67125252, 1074004032, 4676, 67125252, 1074004032, 4676, 67125252, 1074004032, 4676, 67125252, 1074004032, 4688, 67125252, 1074004032, 4688, 67125252, 1074004032, 4688, 67125252, 1074004032, 4688, 67125252, 1074004032, 4688, 67125252, 1074004032, 4688, 67125252, 1074004032, 4692, 67125252, 1074004032, 4692, 67125252, 1074004032, 4692, 67125252, 1074004032, 4692, 67125252, 1074004032, 4692, 67125252, 1074004032, 4692, 67125252, 1074004032, 4704, 67125252, 1074004032, 4704, 67125252, 1074004032, 4704, 67125252, 1074004032, 4704, 67125252, 1074004032, 4704, 67125252, 1074004032, 4704, 67125252, 1074004032, 4708, 67125252, 1074004032, 4708, 67125252, 1074004032, 4708, 67125252, 1074004032, 4708, 67125252, 1074004032, 4708, 67125252, 1074004032, 4708, 67125252, 1074004032, 5568, 16388, 1073741824, 5568, 16388, 1073741824, 5568, 16388, 1073741824, 5584, 16388, 1073741824, 5584, 16388, 1073741824, 5584, 16388, 1073741824, 5600, 16388, 1073741824, 5600, 16388, 1073741824, 5600, 16388, 1073741824, 6208, 559240, 0, 6208, 559240, 0, 6208, 559240, 0, 6208, 559240, 0, 6208, 559240, 0, 8704, 0, 4194304, 8720, 0, 4194304, 9152, 32768, 0, 9168, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450630813763759_650_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450630813763759_650_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..50cff663 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450630813763759_650_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,189 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 60)) { + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 19)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 35)) { + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 45))) { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 51))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 33 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1664, 17, 0, 1664, 17, 0, 2304, 34, 0, 2304, 34, 0, 3840, 34, 0, 3840, 34, 0, 11456, 559240, 0, 11456, 559240, 0, 11456, 559240, 0, 11456, 559240, 0, 11456, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450631309528224_651_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450631309528224_651_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f065b186 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450631309528224_651_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,430 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 47))) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 53)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 42))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 45))) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 49))) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 25))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 51)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 43))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((296 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((305 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (342 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((360 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((371 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 44)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((388 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((395 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (402 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (406 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 402 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 2304, 0, 1224736768, 2304, 0, 1224736768, 2304, 0, 1224736768, 2752, 2340, 0, 2752, 2340, 0, 2752, 2340, 0, 2752, 2340, 0, 8640, 0, 2097152, 15040, 0, 286261248, 15040, 0, 286261248, 15040, 0, 286261248, 15680, 1, 0, 16256, 268501008, 1048832, 16256, 268501008, 1048832, 16256, 268501008, 1048832, 16256, 268501008, 1048832, 16256, 268501008, 1048832, 16576, 1048832, 16781313, 16576, 1048832, 16781313, 16576, 1048832, 16781313, 16576, 1048832, 16781313, 16576, 1048832, 16781313, 18240, 0, 572653568, 18240, 0, 572653568, 18240, 0, 572653568, 18240, 0, 572653568, 18256, 0, 572653568, 18256, 0, 572653568, 18256, 0, 572653568, 18256, 0, 572653568, 18272, 0, 572653568, 18272, 0, 572653568, 18272, 0, 572653568, 18272, 0, 572653568, 18944, 8738, 536870912, 18944, 8738, 536870912, 18944, 8738, 536870912, 18944, 8738, 536870912, 18944, 8738, 536870912, 18960, 8738, 536870912, 18960, 8738, 536870912, 18960, 8738, 536870912, 18960, 8738, 536870912, 18960, 8738, 536870912, 18976, 8738, 536870912, 18976, 8738, 536870912, 18976, 8738, 536870912, 18976, 8738, 536870912, 18976, 8738, 536870912, 19520, 572653568, 546, 19520, 572653568, 546, 19520, 572653568, 546, 19520, 572653568, 546, 19520, 572653568, 546, 19520, 572653568, 546, 19520, 572653568, 546, 19536, 572653568, 546, 19536, 572653568, 546, 19536, 572653568, 546, 19536, 572653568, 546, 19536, 572653568, 546, 19536, 572653568, 546, 19536, 572653568, 546, 19552, 572653568, 546, 19552, 572653568, 546, 19552, 572653568, 546, 19552, 572653568, 546, 19552, 572653568, 546, 19552, 572653568, 546, 19552, 572653568, 546, 20352, 546, 572653568, 20352, 546, 572653568, 20352, 546, 572653568, 20352, 546, 572653568, 20352, 546, 572653568, 20352, 546, 572653568, 20352, 546, 572653568, 20368, 546, 572653568, 20368, 546, 572653568, 20368, 546, 572653568, 20368, 546, 572653568, 20368, 546, 572653568, 20368, 546, 572653568, 20368, 546, 572653568, 20384, 546, 572653568, 20384, 546, 572653568, 20384, 546, 572653568, 20384, 546, 572653568, 20384, 546, 572653568, 20384, 546, 572653568, 20384, 546, 572653568, 21888, 64, 0, 23056, 1024, 67108864, 23056, 1024, 67108864, 23072, 1024, 67108864, 23072, 1024, 67108864, 23088, 1024, 67108864, 23088, 1024, 67108864, 23760, 0, 67108864, 23776, 0, 67108864, 23792, 0, 67108864, 24832, 0, 1074003968, 24832, 0, 1074003968, 24848, 0, 1074003968, 24848, 0, 1074003968, 24864, 0, 1074003968, 24864, 0, 1074003968, 25280, 67125252, 0, 25280, 67125252, 0, 25280, 67125252, 0, 25296, 67125252, 0, 25296, 67125252, 0, 25296, 67125252, 0, 25312, 67125252, 0, 25312, 67125252, 0, 25312, 67125252, 0, 25728, 559240, 0, 25728, 559240, 0, 25728, 559240, 0, 25728, 559240, 0, 25728, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450677037662095_652_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450677037662095_652_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..93909a95 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450677037662095_652_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,269 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 8))) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 60))) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 44))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 54))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (230 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((276 << 6) | (counter3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((287 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 171 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 19173961, 0, 1088, 19173961, 0, 1088, 19173961, 0, 1088, 19173961, 0, 1088, 19173961, 0, 1088, 19173961, 0, 1088, 19173961, 0, 1088, 19173961, 0, 1088, 19173961, 0, 1104, 19173961, 0, 1104, 19173961, 0, 1104, 19173961, 0, 1104, 19173961, 0, 1104, 19173961, 0, 1104, 19173961, 0, 1104, 19173961, 0, 1104, 19173961, 0, 1104, 19173961, 0, 6784, 0, 2181570560, 6784, 0, 2181570560, 6784, 0, 2181570560, 6784, 0, 2181570560, 6800, 0, 2181570560, 6800, 0, 2181570560, 6800, 0, 2181570560, 6800, 0, 2181570560, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 7744, 272696336, 68174084, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513, 8064, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450742280702274_656_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450742280702274_656_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ad8a6ced --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450742280702274_656_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 62))) { + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 38))) { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 402 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 17, 0, 1792, 17, 0, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 2688, 1717986918, 1717986918, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 3136, 978670, 0, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10176, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10192, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688, 10208, 262143, 4286578688] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450836564308484_658_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450836564308484_658_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..da1f9265 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450836564308484_658_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 51))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2624, 0, 268435456] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450838541296314_660_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450838541296314_660_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..287af227 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450838541296314_660_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,117 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 1, 2415919104, 1280, 1, 2415919104, 1280, 1, 2415919104, 2304, 33288, 2147483648, 2304, 33288, 2147483648, 2304, 33288, 2147483648, 2304, 33288, 2147483648, 3008, 585, 2147483648, 3008, 585, 2147483648, 3008, 585, 2147483648, 3008, 585, 2147483648, 3008, 585, 2147483648, 3584, 272696336, 68174084, 3584, 272696336, 68174084, 3584, 272696336, 68174084, 3584, 272696336, 68174084, 3584, 272696336, 68174084, 3584, 272696336, 68174084, 3584, 272696336, 68174084, 3584, 272696336, 68174084, 3584, 272696336, 68174084, 3584, 272696336, 68174084, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513, 3904, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450838979566947_661_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450838979566947_661_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..953b7cb2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450838979566947_661_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,259 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 55))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 4224, 17, 0, 4224, 17, 0, 9856, 559240, 0, 9856, 559240, 0, 9856, 559240, 0, 9856, 559240, 0, 9856, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450873205255552_663_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450873205255552_663_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..13b23c11 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450873205255552_663_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,160 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 47))) { + if (((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((109 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 195 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 3264, 286331153, 286331153, 4224, 139810, 0, 4224, 139810, 0, 4224, 139810, 0, 4224, 139810, 0, 4224, 139810, 0, 4240, 139810, 0, 4240, 139810, 0, 4240, 139810, 0, 4240, 139810, 0, 4240, 139810, 0, 6208, 139264, 0, 6208, 139264, 0, 6212, 139264, 0, 6212, 139264, 0, 6216, 139264, 0, 6216, 139264, 0, 6224, 139264, 0, 6224, 139264, 0, 6228, 139264, 0, 6228, 139264, 0, 6232, 139264, 0, 6232, 139264, 0, 6976, 0, 572522496, 6976, 0, 572522496, 6976, 0, 572522496, 6992, 0, 572522496, 6992, 0, 572522496, 6992, 0, 572522496, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7936, 559240, 0, 7936, 559240, 0, 7936, 559240, 0, 7936, 559240, 0, 7936, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450899928916737_665_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450899928916737_665_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..529ba272 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450899928916737_665_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,218 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((189 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 399 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 15, 4227858432, 1088, 15, 4227858432, 1088, 15, 4227858432, 1088, 15, 4227858432, 1088, 15, 4227858432, 1088, 15, 4227858432, 1088, 15, 4227858432, 1088, 15, 4227858432, 1088, 15, 4227858432, 1088, 15, 4227858432, 3972, 0, 16777216, 3976, 0, 16777216, 3988, 0, 16777216, 3992, 0, 16777216, 4800, 65, 0, 4800, 65, 0, 5376, 272696336, 68174084, 5376, 272696336, 68174084, 5376, 272696336, 68174084, 5376, 272696336, 68174084, 5376, 272696336, 68174084, 5376, 272696336, 68174084, 5376, 272696336, 68174084, 5376, 272696336, 68174084, 5376, 272696336, 68174084, 5376, 272696336, 68174084, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 5696, 68174084, 1090785345, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10240, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10256, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10272, 715827882, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10688, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10704, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 10720, 2863311530, 0, 12112, 536870928, 1024, 12112, 536870928, 1024, 12112, 536870928, 1024] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756450921433232313_666_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756450921433232313_666_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7c47d3ec --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756450921433232313_666_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,374 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 58)) { + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((175 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((227 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((238 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 2)) { + break; + } + } + break; + } + } + if ((WaveGetLaneIndex() >= 58)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((277 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((294 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((303 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter4 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 564 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1344, 272696336, 68174084, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 1664, 68174084, 1090785345, 10320, 65536, 256, 10320, 65536, 256, 10336, 65536, 256, 10336, 65536, 256, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 12800, 1717986918, 1717986918, 14528, 2184, 2290649088, 14528, 2184, 2290649088, 14528, 2184, 2290649088, 14528, 2184, 2290649088, 14528, 2184, 2290649088, 14528, 2184, 2290649088, 14528, 2184, 2290649088, 14528, 2184, 2290649088, 14528, 2184, 2290649088, 14532, 2184, 2290649088, 14532, 2184, 2290649088, 14532, 2184, 2290649088, 14532, 2184, 2290649088, 14532, 2184, 2290649088, 14532, 2184, 2290649088, 14532, 2184, 2290649088, 14532, 2184, 2290649088, 14532, 2184, 2290649088, 14536, 2184, 2290649088, 14536, 2184, 2290649088, 14536, 2184, 2290649088, 14536, 2184, 2290649088, 14536, 2184, 2290649088, 14536, 2184, 2290649088, 14536, 2184, 2290649088, 14536, 2184, 2290649088, 14536, 2184, 2290649088, 14544, 2184, 2290649088, 14544, 2184, 2290649088, 14544, 2184, 2290649088, 14544, 2184, 2290649088, 14544, 2184, 2290649088, 14544, 2184, 2290649088, 14544, 2184, 2290649088, 14544, 2184, 2290649088, 14544, 2184, 2290649088, 14548, 2184, 2290649088, 14548, 2184, 2290649088, 14548, 2184, 2290649088, 14548, 2184, 2290649088, 14548, 2184, 2290649088, 14548, 2184, 2290649088, 14548, 2184, 2290649088, 14548, 2184, 2290649088, 14548, 2184, 2290649088, 14552, 2184, 2290649088, 14552, 2184, 2290649088, 14552, 2184, 2290649088, 14552, 2184, 2290649088, 14552, 2184, 2290649088, 14552, 2184, 2290649088, 14552, 2184, 2290649088, 14552, 2184, 2290649088, 14552, 2184, 2290649088, 14560, 2184, 2290649088, 14560, 2184, 2290649088, 14560, 2184, 2290649088, 14560, 2184, 2290649088, 14560, 2184, 2290649088, 14560, 2184, 2290649088, 14560, 2184, 2290649088, 14560, 2184, 2290649088, 14560, 2184, 2290649088, 14564, 2184, 2290649088, 14564, 2184, 2290649088, 14564, 2184, 2290649088, 14564, 2184, 2290649088, 14564, 2184, 2290649088, 14564, 2184, 2290649088, 14564, 2184, 2290649088, 14564, 2184, 2290649088, 14564, 2184, 2290649088, 14568, 2184, 2290649088, 14568, 2184, 2290649088, 14568, 2184, 2290649088, 14568, 2184, 2290649088, 14568, 2184, 2290649088, 14568, 2184, 2290649088, 14568, 2184, 2290649088, 14568, 2184, 2290649088, 14568, 2184, 2290649088, 15232, 8, 2290089984, 15232, 8, 2290089984, 15232, 8, 2290089984, 15232, 8, 2290089984, 15236, 8, 2290089984, 15236, 8, 2290089984, 15236, 8, 2290089984, 15236, 8, 2290089984, 15240, 8, 2290089984, 15240, 8, 2290089984, 15240, 8, 2290089984, 15240, 8, 2290089984, 15248, 8, 2290089984, 15248, 8, 2290089984, 15248, 8, 2290089984, 15248, 8, 2290089984, 15252, 8, 2290089984, 15252, 8, 2290089984, 15252, 8, 2290089984, 15252, 8, 2290089984, 15256, 8, 2290089984, 15256, 8, 2290089984, 15256, 8, 2290089984, 15256, 8, 2290089984, 15264, 8, 2290089984, 15264, 8, 2290089984, 15264, 8, 2290089984, 15264, 8, 2290089984, 15268, 8, 2290089984, 15268, 8, 2290089984, 15268, 8, 2290089984, 15268, 8, 2290089984, 15272, 8, 2290089984, 15272, 8, 2290089984, 15272, 8, 2290089984, 15272, 8, 2290089984, 18832, 0, 2818572288, 18832, 0, 2818572288, 18832, 0, 2818572288, 18836, 0, 2818572288, 18836, 0, 2818572288, 18836, 0, 2818572288, 19408, 0, 1409286144, 19408, 0, 1409286144, 19408, 0, 1409286144, 19412, 0, 1409286144, 19412, 0, 1409286144, 19412, 0, 1409286144] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451224219253138_671_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451224219253138_671_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d9acdcfa --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451224219253138_671_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 33)) { + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 213 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1664, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 1680, 572662306, 572662306, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3072, 1717986918, 1717986918, 3520, 559240, 0, 3520, 559240, 0, 3520, 559240, 0, 3520, 559240, 0, 3520, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451233474701954_674_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451233474701954_674_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..30d63d3d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451233474701954_674_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 159 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2176, 16777216, 524288, 2176, 16777216, 524288, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1920, 1414878549, 1431655765, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560, 1536, 699050, 2862786560] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451233891946640_675_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451233891946640_675_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c8b693d9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451233891946640_675_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 4)) { + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 51))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 53)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 258 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 2704, 136348168, 2181570690, 3456, 272696336, 68174084, 3456, 272696336, 68174084, 3456, 272696336, 68174084, 3456, 272696336, 68174084, 3456, 272696336, 68174084, 3456, 272696336, 68174084, 3456, 272696336, 68174084, 3456, 272696336, 68174084, 3456, 272696336, 68174084, 3456, 272696336, 68174084, 5632, 2048, 0, 9792, 0, 4194304, 9408, 8454176, 16809984, 9408, 8454176, 16809984, 9408, 8454176, 16809984, 9408, 8454176, 16809984, 9408, 8454176, 16809984, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 9024, 2854922890, 2863278762, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 8640, 21845, 1410678784, 10432, 85, 0, 10432, 85, 0, 10432, 85, 0, 10432, 85, 0, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168, 12608, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451239122423285_676_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451239122423285_676_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..310c3fba --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451239122423285_676_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,249 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 52)) { + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 61))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 225 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 1792, 136348168, 2181570690, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 11024, 545392672, 136348168, 11024, 545392672, 136348168, 11024, 545392672, 136348168, 11024, 545392672, 136348168, 11024, 545392672, 136348168, 11024, 545392672, 136348168, 11024, 545392672, 136348168, 11024, 545392672, 136348168, 11024, 545392672, 136348168, 11024, 545392672, 136348168, 11040, 545392672, 136348168, 11040, 545392672, 136348168, 11040, 545392672, 136348168, 11040, 545392672, 136348168, 11040, 545392672, 136348168, 11040, 545392672, 136348168, 11040, 545392672, 136348168, 11040, 545392672, 136348168, 11040, 545392672, 136348168, 11040, 545392672, 136348168, 11648, 73, 0, 11648, 73, 0, 11648, 73, 0, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12224, 272696336, 68174084, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513, 12544, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451242929532437_677_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451242929532437_677_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7da7bfa3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451242929532437_677_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,151 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 57)) { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((62 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 456 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 0, 2449473536, 768, 0, 2449473536, 768, 0, 2449473536, 1216, 585, 0, 1216, 585, 0, 1216, 585, 0, 1216, 585, 0, 3412, 136348168, 532610, 3412, 136348168, 532610, 3412, 136348168, 532610, 3412, 136348168, 532610, 3412, 136348168, 532610, 3412, 136348168, 532610, 3412, 136348168, 532610, 3412, 136348168, 532610, 3412, 136348168, 532610, 3416, 136348168, 532610, 3416, 136348168, 532610, 3416, 136348168, 532610, 3416, 136348168, 532610, 3416, 136348168, 532610, 3416, 136348168, 532610, 3416, 136348168, 532610, 3416, 136348168, 532610, 3416, 136348168, 532610, 3420, 136348168, 532610, 3420, 136348168, 532610, 3420, 136348168, 532610, 3420, 136348168, 532610, 3420, 136348168, 532610, 3420, 136348168, 532610, 3420, 136348168, 532610, 3420, 136348168, 532610, 3420, 136348168, 532610, 3428, 136348168, 532610, 3428, 136348168, 532610, 3428, 136348168, 532610, 3428, 136348168, 532610, 3428, 136348168, 532610, 3428, 136348168, 532610, 3428, 136348168, 532610, 3428, 136348168, 532610, 3428, 136348168, 532610, 3432, 136348168, 532610, 3432, 136348168, 532610, 3432, 136348168, 532610, 3432, 136348168, 532610, 3432, 136348168, 532610, 3432, 136348168, 532610, 3432, 136348168, 532610, 3432, 136348168, 532610, 3432, 136348168, 532610, 3436, 136348168, 532610, 3436, 136348168, 532610, 3436, 136348168, 532610, 3436, 136348168, 532610, 3436, 136348168, 532610, 3436, 136348168, 532610, 3436, 136348168, 532610, 3436, 136348168, 532610, 3436, 136348168, 532610, 3988, 1090785345, 4260880, 3988, 1090785345, 4260880, 3988, 1090785345, 4260880, 3988, 1090785345, 4260880, 3988, 1090785345, 4260880, 3988, 1090785345, 4260880, 3988, 1090785345, 4260880, 3988, 1090785345, 4260880, 3988, 1090785345, 4260880, 3988, 1090785345, 4260880, 3992, 1090785345, 4260880, 3992, 1090785345, 4260880, 3992, 1090785345, 4260880, 3992, 1090785345, 4260880, 3992, 1090785345, 4260880, 3992, 1090785345, 4260880, 3992, 1090785345, 4260880, 3992, 1090785345, 4260880, 3992, 1090785345, 4260880, 3992, 1090785345, 4260880, 3996, 1090785345, 4260880, 3996, 1090785345, 4260880, 3996, 1090785345, 4260880, 3996, 1090785345, 4260880, 3996, 1090785345, 4260880, 3996, 1090785345, 4260880, 3996, 1090785345, 4260880, 3996, 1090785345, 4260880, 3996, 1090785345, 4260880, 3996, 1090785345, 4260880, 4004, 1090785345, 4260880, 4004, 1090785345, 4260880, 4004, 1090785345, 4260880, 4004, 1090785345, 4260880, 4004, 1090785345, 4260880, 4004, 1090785345, 4260880, 4004, 1090785345, 4260880, 4004, 1090785345, 4260880, 4004, 1090785345, 4260880, 4004, 1090785345, 4260880, 4008, 1090785345, 4260880, 4008, 1090785345, 4260880, 4008, 1090785345, 4260880, 4008, 1090785345, 4260880, 4008, 1090785345, 4260880, 4008, 1090785345, 4260880, 4008, 1090785345, 4260880, 4008, 1090785345, 4260880, 4008, 1090785345, 4260880, 4008, 1090785345, 4260880, 4012, 1090785345, 4260880, 4012, 1090785345, 4260880, 4012, 1090785345, 4260880, 4012, 1090785345, 4260880, 4012, 1090785345, 4260880, 4012, 1090785345, 4260880, 4012, 1090785345, 4260880, 4012, 1090785345, 4260880, 4012, 1090785345, 4260880, 4012, 1090785345, 4260880, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5504, 272696336, 68174084, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513, 5824, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451247740693977_678_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451247740693977_678_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e08efef5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451247740693977_678_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,154 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 31))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() >= 37)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4944, 2, 2097168, 4944, 2, 2097168, 4944, 2, 2097168, 4960, 2, 2097168, 4960, 2, 2097168, 4960, 2, 2097168, 6928, 0, 1431650304, 6928, 0, 1431650304, 6928, 0, 1431650304, 6928, 0, 1431650304, 6928, 0, 1431650304, 6928, 0, 1431650304, 6928, 0, 1431650304, 6928, 0, 1431650304, 6928, 0, 1431650304, 6944, 0, 1431650304, 6944, 0, 1431650304, 6944, 0, 1431650304, 6944, 0, 1431650304, 6944, 0, 1431650304, 6944, 0, 1431650304, 6944, 0, 1431650304, 6944, 0, 1431650304, 6944, 0, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7504, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 7520, 349525, 1431650304, 8208, 1, 4160749568, 8208, 1, 4160749568, 8208, 1, 4160749568, 8208, 1, 4160749568, 8208, 1, 4160749568, 8208, 1, 4160749568, 8224, 1, 4160749568, 8224, 1, 4160749568, 8224, 1, 4160749568, 8224, 1, 4160749568, 8224, 1, 4160749568, 8224, 1, 4160749568, 9168, 2, 1052672, 9168, 2, 1052672, 9168, 2, 1052672, 9184, 2, 1052672, 9184, 2, 1052672, 9184, 2, 1052672] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451252841957919_679_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451252841957919_679_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451252841957919_679_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451253090792333_680_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451253090792333_680_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d35f4862 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451253090792333_680_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,118 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 2624, 8, 2147483648, 2624, 8, 2147483648, 3328, 2184, 2290089984, 3328, 2184, 2290089984, 3328, 2184, 2290089984, 3328, 2184, 2290089984, 3328, 2184, 2290089984, 3328, 2184, 2290089984] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451268268419429_682_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451268268419429_682_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f763710d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451268268419429_682_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 4)) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((26 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 58))) { + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((96 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6160, 2, 0, 6176, 2, 0, 6192, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451270410972731_684_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451270410972731_684_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3f4313ec --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451270410972731_684_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,97 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 51)) { + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 585 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 896, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 912, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 928, 131071, 0, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1536, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1552, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1568, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 1984, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2000, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2016, 0, 4294443008, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2432, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2448, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272, 2464, 0, 4294966272] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451273722888601_685_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451273722888601_685_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451273722888601_685_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451282841991265_687_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451282841991265_687_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6e0e1e08 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451282841991265_687_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 52))) { + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 12))) { + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 111 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 0, 1048576, 3024, 0, 1048576, 4160, 2048, 0, 4176, 2048, 0, 6080, 16, 0, 6720, 17, 0, 6720, 17, 0, 7888, 139810, 570425344, 7888, 139810, 570425344, 7888, 139810, 570425344, 7888, 139810, 570425344, 7888, 139810, 570425344, 7888, 139810, 570425344, 7888, 139810, 570425344, 7904, 139810, 570425344, 7904, 139810, 570425344, 7904, 139810, 570425344, 7904, 139810, 570425344, 7904, 139810, 570425344, 7904, 139810, 570425344, 7904, 139810, 570425344, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612, 8192, 1145324612, 1145324612] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451284247941382_688_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451284247941382_688_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..66878010 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451284247941382_688_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,180 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 49))) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 61))) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 4736, 524288, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451492026440353_692_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451492026440353_692_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f2fa0da9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451492026440353_692_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,296 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 54))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((223 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((257 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 624 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3008, 134251016, 2181570690, 3008, 134251016, 2181570690, 3008, 134251016, 2181570690, 3008, 134251016, 2181570690, 3008, 134251016, 2181570690, 3008, 134251016, 2181570690, 3008, 134251016, 2181570690, 3008, 134251016, 2181570690, 3008, 134251016, 2181570690, 3008, 134251016, 2181570690, 3648, 73, 0, 3648, 73, 0, 3648, 73, 0, 5376, 134251016, 2181570690, 5376, 134251016, 2181570690, 5376, 134251016, 2181570690, 5376, 134251016, 2181570690, 5376, 134251016, 2181570690, 5376, 134251016, 2181570690, 5376, 134251016, 2181570690, 5376, 134251016, 2181570690, 5376, 134251016, 2181570690, 5376, 134251016, 2181570690, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 5952, 272696336, 68174084, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 6272, 3067833782, 1840700269, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 7616, 85, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 8576, 85, 0, 8592, 85, 0, 8592, 85, 0, 8592, 85, 0, 8592, 85, 0, 8608, 85, 0, 8608, 85, 0, 8608, 85, 0, 8608, 85, 0, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11328, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11344, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 11360, 1431655680, 1431655765, 13056, 256, 0, 14272, 682, 2862612480, 14272, 682, 2862612480, 14272, 682, 2862612480, 14272, 682, 2862612480, 14272, 682, 2862612480, 14272, 682, 2862612480, 14272, 682, 2862612480, 14272, 682, 2862612480, 14272, 682, 2862612480, 14272, 682, 2862612480, 14272, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 14288, 682, 2862612480, 16448, 42, 2684354560, 16448, 42, 2684354560, 16448, 42, 2684354560, 16448, 42, 2684354560, 16448, 42, 2684354560, 16464, 42, 2684354560, 16464, 42, 2684354560, 16464, 42, 2684354560, 16464, 42, 2684354560, 16464, 42, 2684354560] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451553702242586_696_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451553702242586_696_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c6df3c29 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451553702242586_696_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,119 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 24 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0, 1792, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451554100256042_697_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451554100256042_697_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..84fc05a3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451554100256042_697_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 135 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 2496, 0, 4294967295, 1856, 544, 0, 1856, 544, 0, 1600, 7647, 0, 1600, 7647, 0, 1600, 7647, 0, 1600, 7647, 0, 1600, 7647, 0, 1600, 7647, 0, 1600, 7647, 0, 1600, 7647, 0, 1600, 7647, 0, 1600, 7647, 0, 1600, 7647, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451554483426892_698_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451554483426892_698_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b09558e4 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451554483426892_698_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,291 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 39)) { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 33)) { + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((140 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 477 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1984, 268501008, 1048832, 1984, 268501008, 1048832, 1984, 268501008, 1048832, 1984, 268501008, 1048832, 1984, 268501008, 1048832, 3200, 0, 16777216, 3216, 0, 16777216, 4544, 0, 1145044992, 4544, 0, 1145044992, 4544, 0, 1145044992, 5712, 0, 1145307136, 5712, 0, 1145307136, 5712, 0, 1145307136, 5712, 0, 1145307136, 5728, 0, 1145307136, 5728, 0, 1145307136, 5728, 0, 1145307136, 5728, 0, 1145307136, 6544, 0, 1145307136, 6544, 0, 1145307136, 6544, 0, 1145307136, 6544, 0, 1145307136, 6560, 0, 1145307136, 6560, 0, 1145307136, 6560, 0, 1145307136, 6560, 0, 1145307136, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 6976, 559240, 0, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8208, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8224, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8240, 136348168, 2181570690, 8976, 0, 2454267024, 8976, 0, 2454267024, 8976, 0, 2454267024, 8976, 0, 2454267024, 8976, 0, 2454267024, 8976, 0, 2454267024, 8976, 0, 2454267024, 8976, 0, 2454267024, 8976, 0, 2454267024, 8976, 0, 2454267024, 8992, 0, 2454267024, 8992, 0, 2454267024, 8992, 0, 2454267024, 8992, 0, 2454267024, 8992, 0, 2454267024, 8992, 0, 2454267024, 8992, 0, 2454267024, 8992, 0, 2454267024, 8992, 0, 2454267024, 8992, 0, 2454267024, 9008, 0, 2454267024, 9008, 0, 2454267024, 9008, 0, 2454267024, 9008, 0, 2454267024, 9008, 0, 2454267024, 9008, 0, 2454267024, 9008, 0, 2454267024, 9008, 0, 2454267024, 9008, 0, 2454267024, 9008, 0, 2454267024, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10960, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10976, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 10992, 136348168, 2181570690, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 11520, 272696336, 68174084, 13456, 536870912, 0, 13472, 536870912, 0, 14352, 545392672, 136348168, 14352, 545392672, 136348168, 14352, 545392672, 136348168, 14352, 545392672, 136348168, 14352, 545392672, 136348168, 14352, 545392672, 136348168, 14352, 545392672, 136348168, 14352, 545392672, 136348168, 14352, 545392672, 136348168, 14352, 545392672, 136348168, 14368, 545392672, 136348168, 14368, 545392672, 136348168, 14368, 545392672, 136348168, 14368, 545392672, 136348168, 14368, 545392672, 136348168, 14368, 545392672, 136348168, 14368, 545392672, 136348168, 14368, 545392672, 136348168, 14368, 545392672, 136348168, 14368, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451692554388197_701_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451692554388197_701_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c1d6793 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451692554388197_701_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 63)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 36))) { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((157 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 441 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 3968, 1145324612, 1145324612, 4416, 559240, 0, 4416, 559240, 0, 4416, 559240, 0, 4416, 559240, 0, 4416, 559240, 0, 8384, 0, 16, 10048, 146, 536870912, 10048, 146, 536870912, 10048, 146, 536870912, 10048, 146, 536870912, 10064, 146, 536870912, 10064, 146, 536870912, 10064, 146, 536870912, 10064, 146, 536870912, 10752, 146, 536870912, 10752, 146, 536870912, 10752, 146, 536870912, 10752, 146, 536870912, 10768, 146, 536870912, 10768, 146, 536870912, 10768, 146, 536870912, 10768, 146, 536870912, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513, 11072, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451694190909978_702_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451694190909978_702_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6575eb27 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451694190909978_702_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,301 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 56))) { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 56)) { + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter3 == 1)) { + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 52))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((237 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 2)) { + counter6 = (counter6 + 1); + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((286 << 6) | (i5 << 4)) | (counter6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 138 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3344, 16781313, 0, 3344, 16781313, 0, 3344, 16781313, 0, 3348, 16781313, 0, 3348, 16781313, 0, 3348, 16781313, 0, 3352, 16781313, 0, 3352, 16781313, 0, 3352, 16781313, 0, 3360, 16781313, 0, 3360, 16781313, 0, 3360, 16781313, 0, 3364, 16781313, 0, 3364, 16781313, 0, 3364, 16781313, 0, 3368, 16781313, 0, 3368, 16781313, 0, 3368, 16781313, 0, 3792, 4097, 0, 3792, 4097, 0, 3796, 4097, 0, 3796, 4097, 0, 3800, 4097, 0, 3800, 4097, 0, 3808, 4097, 0, 3808, 4097, 0, 3812, 4097, 0, 3812, 4097, 0, 3816, 4097, 0, 3816, 4097, 0, 10640, 0, 16777216, 18308, 0, 67108864, 18312, 0, 67108864, 18324, 0, 67108864, 18328, 0, 67108864, 19328, 67125252, 1074004032, 19328, 67125252, 1074004032, 19328, 67125252, 1074004032, 19328, 67125252, 1074004032, 19328, 67125252, 1074004032, 19328, 67125252, 1074004032, 19776, 559240, 0, 19776, 559240, 0, 19776, 559240, 0, 19776, 559240, 0, 19776, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756451700905785498_703_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756451700905785498_703_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4703f58a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756451700905785498_703_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,257 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 40))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((156 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((208 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 468 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1984, 512, 0, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4688, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4704, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 4720, 174250, 2863267840, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7248, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 7264, 1090785345, 272696336, 8912, 4198400, 32, 8912, 4198400, 32, 8912, 4198400, 32, 11216, 134218752, 32, 11216, 134218752, 32, 11216, 134218752, 32, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12752, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 12768, 1090785345, 272696336, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13328, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13344, 136348168, 2181570690, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513, 14208, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756452889980142447_705_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756452889980142447_705_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b0dc655f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756452889980142447_705_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2368, 8322, 545259520, 2368, 8322, 545259520, 2368, 8322, 545259520, 2368, 8322, 545259520, 2368, 8322, 545259520, 3072, 8322, 545390592, 3072, 8322, 545390592, 3072, 8322, 545390592, 3072, 8322, 545390592, 3072, 8322, 545390592, 3072, 8322, 545390592, 3392, 545392672, 136348168, 3392, 545392672, 136348168, 3392, 545392672, 136348168, 3392, 545392672, 136348168, 3392, 545392672, 136348168, 3392, 545392672, 136348168, 3392, 545392672, 136348168, 3392, 545392672, 136348168, 3392, 545392672, 136348168, 3392, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756452890410173834_706_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756452890410173834_706_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e16e028f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756452890410173834_706_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,108 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 9)) { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 13)) { + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1728, 512, 0, 3072, 512, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756452890609338614_707_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756452890609338614_707_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9ae50f96 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756452890609338614_707_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,257 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((72 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 26)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((82 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 57))) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((168 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() < 18)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (242 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 327 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2496, 2097161, 33554432, 2496, 2097161, 33554432, 2496, 2097161, 33554432, 2496, 2097161, 33554432, 2112, 8390656, 2621440, 2112, 8390656, 2621440, 2112, 8390656, 2621440, 2112, 8390656, 2621440, 4624, 0, 268435456, 4628, 0, 268435456, 4640, 0, 268435456, 4644, 0, 268435456, 5264, 4369, 0, 5264, 4369, 0, 5264, 4369, 0, 5264, 4369, 0, 5268, 4369, 0, 5268, 4369, 0, 5268, 4369, 0, 5268, 4369, 0, 5280, 4369, 0, 5280, 4369, 0, 5280, 4369, 0, 5280, 4369, 0, 5284, 4369, 0, 5284, 4369, 0, 5284, 4369, 0, 5284, 4369, 0, 6224, 4096, 0, 6228, 4096, 0, 6240, 4096, 0, 6244, 4096, 0, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7296, 1145324612, 1145324612, 7744, 559240, 0, 7744, 559240, 0, 7744, 559240, 0, 7744, 559240, 0, 7744, 559240, 0, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 11520, 1431590229, 1431639380, 12480, 21845, 0, 12480, 21845, 0, 12480, 21845, 0, 12480, 21845, 0, 12480, 21845, 0, 12480, 21845, 0, 12480, 21845, 0, 12480, 21845, 0, 12496, 21845, 0, 12496, 21845, 0, 12496, 21845, 0, 12496, 21845, 0, 12496, 21845, 0, 12496, 21845, 0, 12496, 21845, 0, 12496, 21845, 0, 12512, 21845, 0, 12512, 21845, 0, 12512, 21845, 0, 12512, 21845, 0, 12512, 21845, 0, 12512, 21845, 0, 12512, 21845, 0, 12512, 21845, 0, 14016, 0, 2147483648, 14032, 0, 2147483648, 14048, 0, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756452920395636353_708_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756452920395636353_708_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d60e0171 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756452920395636353_708_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,133 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((53 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 31))) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2112, 17476, 0, 2112, 17476, 0, 2112, 17476, 0, 2112, 17476, 0, 2128, 17476, 0, 2128, 17476, 0, 2128, 17476, 0, 2128, 17476, 0, 4100, 1073741824, 0, 4104, 1073741824, 0, 4108, 1073741824, 0, 4116, 1073741824, 0, 4120, 1073741824, 0, 4124, 1073741824, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756452921922753048_709_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756452921922753048_709_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef5ab24e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756452921922753048_709_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,194 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 16))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 10))) { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 31))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 318 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2432, 16842752, 8388608, 2432, 16842752, 8388608, 2432, 16842752, 8388608, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 2048, 65535, 4261412864, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 1792, 2863267840, 2796202, 5840, 17, 0, 5840, 17, 0, 5856, 17, 0, 5856, 17, 0, 5872, 17, 0, 5872, 17, 0, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 7424, 1145324612, 1145324612, 8640, 136, 2290649088, 8640, 136, 2290649088, 8640, 136, 2290649088, 8640, 136, 2290649088, 8640, 136, 2290649088, 8640, 136, 2290649088, 8640, 136, 2290649088, 8640, 136, 2290649088, 8656, 136, 2290649088, 8656, 136, 2290649088, 8656, 136, 2290649088, 8656, 136, 2290649088, 8656, 136, 2290649088, 8656, 136, 2290649088, 8656, 136, 2290649088, 8656, 136, 2290649088, 8672, 136, 2290649088, 8672, 136, 2290649088, 8672, 136, 2290649088, 8672, 136, 2290649088, 8672, 136, 2290649088, 8672, 136, 2290649088, 8672, 136, 2290649088, 8672, 136, 2290649088, 12864, 8, 2290614272, 12864, 8, 2290614272, 12864, 8, 2290614272, 12864, 8, 2290614272, 12864, 8, 2290614272, 12880, 8, 2290614272, 12880, 8, 2290614272, 12880, 8, 2290614272, 12880, 8, 2290614272, 12880, 8, 2290614272, 12896, 8, 2290614272, 12896, 8, 2290614272, 12896, 8, 2290614272, 12896, 8, 2290614272, 12896, 8, 2290614272] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756452943566019581_711_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756452943566019581_711_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..41b9f942 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756452943566019581_711_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,214 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (26 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 51)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 47)) { + if ((WaveGetLaneIndex() < 22)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 243 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2816, 1431655765, 1431655765, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 2432, 11184810, 0, 1408, 0, 2863136768, 1408, 0, 2863136768, 1408, 0, 2863136768, 1408, 0, 2863136768, 1408, 0, 2863136768, 1408, 0, 2863136768, 1408, 0, 2863136768, 3456, 73, 0, 3456, 73, 0, 3456, 73, 0, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 4032, 272696336, 68174084, 5120, 0, 1073741824, 5824, 32, 0, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681, 8448, 613566756, 4681] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756452945921583153_712_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756452945921583153_712_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bffb6f69 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756452945921583153_712_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,227 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((123 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2576, 0, 1073741824, 2592, 0, 1073741824, 3860, 4, 1145324544, 3860, 4, 1145324544, 3860, 4, 1145324544, 3860, 4, 1145324544, 3860, 4, 1145324544, 3860, 4, 1145324544, 3860, 4, 1145324544, 3864, 4, 1145324544, 3864, 4, 1145324544, 3864, 4, 1145324544, 3864, 4, 1145324544, 3864, 4, 1145324544, 3864, 4, 1145324544, 3864, 4, 1145324544, 3868, 4, 1145324544, 3868, 4, 1145324544, 3868, 4, 1145324544, 3868, 4, 1145324544, 3868, 4, 1145324544, 3868, 4, 1145324544, 3868, 4, 1145324544, 3876, 4, 1145324544, 3876, 4, 1145324544, 3876, 4, 1145324544, 3876, 4, 1145324544, 3876, 4, 1145324544, 3876, 4, 1145324544, 3876, 4, 1145324544, 3880, 4, 1145324544, 3880, 4, 1145324544, 3880, 4, 1145324544, 3880, 4, 1145324544, 3880, 4, 1145324544, 3880, 4, 1145324544, 3880, 4, 1145324544, 3884, 4, 1145324544, 3884, 4, 1145324544, 3884, 4, 1145324544, 3884, 4, 1145324544, 3884, 4, 1145324544, 3884, 4, 1145324544, 3884, 4, 1145324544, 4816, 0, 262144, 4832, 0, 262144, 7232, 524288, 0, 7236, 524288, 0, 7240, 524288, 0, 7248, 524288, 0, 7252, 524288, 0, 7256, 524288, 0, 9216, 559240, 0, 9216, 559240, 0, 9216, 559240, 0, 9216, 559240, 0, 9216, 559240, 0, 9220, 559240, 0, 9220, 559240, 0, 9220, 559240, 0, 9220, 559240, 0, 9220, 559240, 0, 9224, 559240, 0, 9224, 559240, 0, 9224, 559240, 0, 9224, 559240, 0, 9224, 559240, 0, 9232, 559240, 0, 9232, 559240, 0, 9232, 559240, 0, 9232, 559240, 0, 9232, 559240, 0, 9236, 559240, 0, 9236, 559240, 0, 9236, 559240, 0, 9236, 559240, 0, 9236, 559240, 0, 9240, 559240, 0, 9240, 559240, 0, 9240, 559240, 0, 9240, 559240, 0, 9240, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756452967031432144_713_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756452967031432144_713_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0488e842 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756452967031432144_713_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,265 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((93 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((100 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 333 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 2112, 17, 0, 2112, 17, 0, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 3008, 1145324612, 1145324612, 5072, 8, 0, 5076, 8, 0, 5080, 8, 0, 5088, 8, 0, 5092, 8, 0, 5096, 8, 0, 5968, 8390656, 134250504, 5968, 8390656, 134250504, 5968, 8390656, 134250504, 5968, 8390656, 134250504, 5968, 8390656, 134250504, 5972, 8390656, 134250504, 5972, 8390656, 134250504, 5972, 8390656, 134250504, 5972, 8390656, 134250504, 5972, 8390656, 134250504, 5976, 8390656, 134250504, 5976, 8390656, 134250504, 5976, 8390656, 134250504, 5976, 8390656, 134250504, 5976, 8390656, 134250504, 5984, 8390656, 134250504, 5984, 8390656, 134250504, 5984, 8390656, 134250504, 5984, 8390656, 134250504, 5984, 8390656, 134250504, 5988, 8390656, 134250504, 5988, 8390656, 134250504, 5988, 8390656, 134250504, 5988, 8390656, 134250504, 5988, 8390656, 134250504, 5992, 8390656, 134250504, 5992, 8390656, 134250504, 5992, 8390656, 134250504, 5992, 8390656, 134250504, 5992, 8390656, 134250504, 7872, 65, 0, 7872, 65, 0, 8448, 272696336, 68174084, 8448, 272696336, 68174084, 8448, 272696336, 68174084, 8448, 272696336, 68174084, 8448, 272696336, 68174084, 8448, 272696336, 68174084, 8448, 272696336, 68174084, 8448, 272696336, 68174084, 8448, 272696336, 68174084, 8448, 272696336, 68174084, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345, 8768, 68174084, 1090785345] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453124140716310_718_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453124140716310_718_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7d203938 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453124140716310_718_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,174 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 21)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 55))) { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 255 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 538968072, 65792, 3392, 538968072, 65792, 3392, 538968072, 65792, 3392, 538968072, 65792, 3392, 538968072, 65792, 3136, 65536, 134217744, 3136, 65536, 134217744, 3136, 65536, 134217744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2880, 0, 4160683744, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 2496, 2031607, 0, 4224, 73, 0, 4224, 73, 0, 4224, 73, 0, 8256, 272696336, 68174084, 8256, 272696336, 68174084, 8256, 272696336, 68174084, 8256, 272696336, 68174084, 8256, 272696336, 68174084, 8256, 272696336, 68174084, 8256, 272696336, 68174084, 8256, 272696336, 68174084, 8256, 272696336, 68174084, 8256, 272696336, 68174084, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513, 8576, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453190715918933_720_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453190715918933_720_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d7194a26 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453190715918933_720_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 62))) { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 186 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 0, 1409286144, 1088, 0, 1409286144, 1088, 0, 1409286144, 1104, 0, 1409286144, 1104, 0, 1409286144, 1104, 0, 1409286144, 3776, 0, 1073741824, 3792, 0, 1073741824, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4480, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021, 4496, 1364546897, 357832021] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453191930330298_721_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453191930330298_721_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..31f70a5f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453191930330298_721_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 56)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 46)) { + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((107 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 525 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4352, 1090785345, 272696336, 4992, 0, 2454257664, 4992, 0, 2454257664, 4992, 0, 2454257664, 4992, 0, 2454257664, 4992, 0, 2454257664, 4992, 0, 2454257664, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 5696, 1090785345, 272696336, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6864, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6880, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 6896, 9362, 613566464, 7952, 306783378, 0, 7952, 306783378, 0, 7952, 306783378, 0, 7952, 306783378, 0, 7952, 306783378, 0, 7952, 306783378, 0, 7952, 306783378, 0, 7952, 306783378, 0, 7952, 306783378, 0, 7952, 306783378, 0, 7956, 306783378, 0, 7956, 306783378, 0, 7956, 306783378, 0, 7956, 306783378, 0, 7956, 306783378, 0, 7956, 306783378, 0, 7956, 306783378, 0, 7956, 306783378, 0, 7956, 306783378, 0, 7956, 306783378, 0, 7960, 306783378, 0, 7960, 306783378, 0, 7960, 306783378, 0, 7960, 306783378, 0, 7960, 306783378, 0, 7960, 306783378, 0, 7960, 306783378, 0, 7960, 306783378, 0, 7960, 306783378, 0, 7960, 306783378, 0, 7968, 306783378, 0, 7968, 306783378, 0, 7968, 306783378, 0, 7968, 306783378, 0, 7968, 306783378, 0, 7968, 306783378, 0, 7968, 306783378, 0, 7968, 306783378, 0, 7968, 306783378, 0, 7968, 306783378, 0, 7972, 306783378, 0, 7972, 306783378, 0, 7972, 306783378, 0, 7972, 306783378, 0, 7972, 306783378, 0, 7972, 306783378, 0, 7972, 306783378, 0, 7972, 306783378, 0, 7972, 306783378, 0, 7972, 306783378, 0, 7976, 306783378, 0, 7976, 306783378, 0, 7976, 306783378, 0, 7976, 306783378, 0, 7976, 306783378, 0, 7976, 306783378, 0, 7976, 306783378, 0, 7976, 306783378, 0, 7976, 306783378, 0, 7976, 306783378, 0, 7984, 306783378, 0, 7984, 306783378, 0, 7984, 306783378, 0, 7984, 306783378, 0, 7984, 306783378, 0, 7984, 306783378, 0, 7984, 306783378, 0, 7984, 306783378, 0, 7984, 306783378, 0, 7984, 306783378, 0, 7988, 306783378, 0, 7988, 306783378, 0, 7988, 306783378, 0, 7988, 306783378, 0, 7988, 306783378, 0, 7988, 306783378, 0, 7988, 306783378, 0, 7988, 306783378, 0, 7988, 306783378, 0, 7988, 306783378, 0, 7992, 306783378, 0, 7992, 306783378, 0, 7992, 306783378, 0, 7992, 306783378, 0, 7992, 306783378, 0, 7992, 306783378, 0, 7992, 306783378, 0, 7992, 306783378, 0, 7992, 306783378, 0, 7992, 306783378, 0, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513, 8256, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453197151818565_722_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453197151818565_722_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aef6d456 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453197151818565_722_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,325 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() >= 33)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((147 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((184 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 62))) { + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((217 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((239 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 1))) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 309 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2944, 0, 286261248, 2944, 0, 286261248, 2944, 0, 286261248, 3776, 273, 0, 3776, 273, 0, 3776, 273, 0, 5632, 1, 0, 7936, 4194304, 64, 7936, 4194304, 64, 7952, 4194304, 64, 7952, 4194304, 64, 7968, 4194304, 64, 7968, 4194304, 64, 11776, 34952, 2147483648, 11776, 34952, 2147483648, 11776, 34952, 2147483648, 11776, 34952, 2147483648, 11776, 34952, 2147483648, 11792, 34952, 2147483648, 11792, 34952, 2147483648, 11792, 34952, 2147483648, 11792, 34952, 2147483648, 11792, 34952, 2147483648, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14592, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 14608, 2290649224, 2290649224, 15296, 2184, 2290614272, 15296, 2184, 2290614272, 15296, 2184, 2290614272, 15296, 2184, 2290614272, 15296, 2184, 2290614272, 15296, 2184, 2290614272, 15296, 2184, 2290614272, 15312, 2184, 2290614272, 15312, 2184, 2290614272, 15312, 2184, 2290614272, 15312, 2184, 2290614272, 15312, 2184, 2290614272, 15312, 2184, 2290614272, 15312, 2184, 2290614272, 15936, 73, 0, 15936, 73, 0, 15936, 73, 0, 17152, 272696336, 68174084, 17152, 272696336, 68174084, 17152, 272696336, 68174084, 17152, 272696336, 68174084, 17152, 272696336, 68174084, 17152, 272696336, 68174084, 17152, 272696336, 68174084, 17152, 272696336, 68174084, 17152, 272696336, 68174084, 17152, 272696336, 68174084, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513, 19392, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453289673822223_724_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453289673822223_724_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1fb12de6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453289673822223_724_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (150 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 345 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2048, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2052, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2064, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2068, 89478485, 0, 2496, 85, 0, 2496, 85, 0, 2496, 85, 0, 2496, 85, 0, 2500, 85, 0, 2500, 85, 0, 2500, 85, 0, 2500, 85, 0, 2512, 85, 0, 2512, 85, 0, 2512, 85, 0, 2512, 85, 0, 2516, 85, 0, 2516, 85, 0, 2516, 85, 0, 2516, 85, 0, 4544, 17, 0, 4544, 17, 0, 6272, 4, 1145324544, 6272, 4, 1145324544, 6272, 4, 1145324544, 6272, 4, 1145324544, 6272, 4, 1145324544, 6272, 4, 1145324544, 6272, 4, 1145324544, 8448, 4, 0, 9152, 4, 1145307136, 9152, 4, 1145307136, 9152, 4, 1145307136, 9152, 4, 1145307136, 9152, 4, 1145307136, 9600, 559240, 0, 9600, 559240, 0, 9600, 559240, 0, 9600, 559240, 0, 9600, 559240, 0, 10240, 17, 0, 10240, 17, 0, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11136, 1145324612, 1145324612, 11584, 559240, 0, 11584, 559240, 0, 11584, 559240, 0, 11584, 559240, 0, 11584, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453383992433043_726_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453383992433043_726_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..51b7639e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453383992433043_726_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,404 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 59))) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 43))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((172 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (263 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (296 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 4))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (355 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (365 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 54))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (383 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (397 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 18, 613566464, 1792, 18, 613566464, 1792, 18, 613566464, 1792, 18, 613566464, 1792, 18, 613566464, 1792, 18, 613566464, 1792, 18, 613566464, 1792, 18, 613566464, 1792, 18, 613566464, 1808, 18, 613566464, 1808, 18, 613566464, 1808, 18, 613566464, 1808, 18, 613566464, 1808, 18, 613566464, 1808, 18, 613566464, 1808, 18, 613566464, 1808, 18, 613566464, 1808, 18, 613566464, 1824, 18, 613566464, 1824, 18, 613566464, 1824, 18, 613566464, 1824, 18, 613566464, 1824, 18, 613566464, 1824, 18, 613566464, 1824, 18, 613566464, 1824, 18, 613566464, 1824, 18, 613566464, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2688, 272696336, 68174084, 2704, 272696336, 68174084, 2704, 272696336, 68174084, 2704, 272696336, 68174084, 2704, 272696336, 68174084, 2704, 272696336, 68174084, 2704, 272696336, 68174084, 2704, 272696336, 68174084, 2704, 272696336, 68174084, 2704, 272696336, 68174084, 2704, 272696336, 68174084, 2720, 272696336, 68174084, 2720, 272696336, 68174084, 2720, 272696336, 68174084, 2720, 272696336, 68174084, 2720, 272696336, 68174084, 2720, 272696336, 68174084, 2720, 272696336, 68174084, 2720, 272696336, 68174084, 2720, 272696336, 68174084, 2720, 272696336, 68174084, 4736, 2340, 1207959552, 4736, 2340, 1207959552, 4736, 2340, 1207959552, 4736, 2340, 1207959552, 4736, 2340, 1207959552, 4736, 2340, 1207959552, 11024, 73, 2147483648, 11024, 73, 2147483648, 11024, 73, 2147483648, 11024, 73, 2147483648, 11040, 73, 2147483648, 11040, 73, 2147483648, 11040, 73, 2147483648, 11040, 73, 2147483648, 11728, 1, 2453667840, 11728, 1, 2453667840, 11728, 1, 2453667840, 11728, 1, 2453667840, 11728, 1, 2453667840, 11744, 1, 2453667840, 11744, 1, 2453667840, 11744, 1, 2453667840, 11744, 1, 2453667840, 11744, 1, 2453667840, 18368, 16, 0, 24512, 128, 8388608, 24512, 128, 8388608, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513, 25152, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453471698195379_729_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453471698195379_729_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..03101f48 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453471698195379_729_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 61))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((111 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 38)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 24)) { + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5888, 73, 0, 5888, 73, 0, 5888, 73, 0, 7104, 33554432, 0, 7120, 33554432, 0, 7136, 33554432, 0, 8448, 16, 1048576, 8448, 16, 1048576, 8464, 16, 1048576, 8464, 16, 1048576, 8480, 16, 1048576, 8480, 16, 1048576, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 8768, 613566756, 1227133513, 12800, 0, 131072, 14080, 73, 0, 14080, 73, 0, 14080, 73, 0, 14656, 4260880, 0, 14656, 4260880, 0, 14656, 4260880, 0, 14656, 4260880, 0, 14976, 9586980, 0, 14976, 9586980, 0, 14976, 9586980, 0, 14976, 9586980, 0, 14976, 9586980, 0, 14976, 9586980, 0, 14976, 9586980, 0, 14976, 9586980, 0, 15872, 0, 1073741824] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453479080900060_730_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453479080900060_730_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..23c1ce77 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453479080900060_730_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,609 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 17)) { + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 34)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((162 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((171 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((184 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((260 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 49))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 17)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (331 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49))) { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (376 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (400 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (416 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (426 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (444 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (455 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (460 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((478 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((488 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((503 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((510 << 6) | (counter4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i5 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((520 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((531 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (535 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 459 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 69905, 0, 768, 69905, 0, 768, 69905, 0, 768, 69905, 0, 768, 69905, 0, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5248, 1145324612, 1145324612, 5696, 559240, 0, 5696, 559240, 0, 5696, 559240, 0, 5696, 559240, 0, 5696, 559240, 0, 6592, 17, 0, 6592, 17, 0, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 9744, 8, 2147483648, 9744, 8, 2147483648, 9748, 8, 2147483648, 9748, 8, 2147483648, 9752, 8, 2147483648, 9752, 8, 2147483648, 9760, 8, 2147483648, 9760, 8, 2147483648, 9764, 8, 2147483648, 9764, 8, 2147483648, 9768, 8, 2147483648, 9768, 8, 2147483648, 9776, 8, 2147483648, 9776, 8, 2147483648, 9780, 8, 2147483648, 9780, 8, 2147483648, 9784, 8, 2147483648, 9784, 8, 2147483648, 11792, 8, 2281701376, 11792, 8, 2281701376, 11792, 8, 2281701376, 11796, 8, 2281701376, 11796, 8, 2281701376, 11796, 8, 2281701376, 11800, 8, 2281701376, 11800, 8, 2281701376, 11800, 8, 2281701376, 11808, 8, 2281701376, 11808, 8, 2281701376, 11808, 8, 2281701376, 11812, 8, 2281701376, 11812, 8, 2281701376, 11812, 8, 2281701376, 11816, 8, 2281701376, 11816, 8, 2281701376, 11816, 8, 2281701376, 11824, 8, 2281701376, 11824, 8, 2281701376, 11824, 8, 2281701376, 11828, 8, 2281701376, 11828, 8, 2281701376, 11828, 8, 2281701376, 11832, 8, 2281701376, 11832, 8, 2281701376, 11832, 8, 2281701376, 12864, 1, 0, 21184, 1048832, 16781313, 21184, 1048832, 16781313, 21184, 1048832, 16781313, 21184, 1048832, 16781313, 21184, 1048832, 16781313, 22528, 0, 33554432, 25600, 512, 33554432, 25600, 512, 33554432, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 29440, 1145324612, 1145324612, 30608, 2184, 2290649088, 30608, 2184, 2290649088, 30608, 2184, 2290649088, 30608, 2184, 2290649088, 30608, 2184, 2290649088, 30608, 2184, 2290649088, 30608, 2184, 2290649088, 30608, 2184, 2290649088, 30608, 2184, 2290649088, 30624, 2184, 2290649088, 30624, 2184, 2290649088, 30624, 2184, 2290649088, 30624, 2184, 2290649088, 30624, 2184, 2290649088, 30624, 2184, 2290649088, 30624, 2184, 2290649088, 30624, 2184, 2290649088, 30624, 2184, 2290649088, 30640, 2184, 2290649088, 30640, 2184, 2290649088, 30640, 2184, 2290649088, 30640, 2184, 2290649088, 30640, 2184, 2290649088, 30640, 2184, 2290649088, 30640, 2184, 2290649088, 30640, 2184, 2290649088, 30640, 2184, 2290649088, 34000, 136, 2281701376, 34000, 136, 2281701376, 34000, 136, 2281701376, 34000, 136, 2281701376, 34016, 136, 2281701376, 34016, 136, 2281701376, 34016, 136, 2281701376, 34016, 136, 2281701376, 34032, 136, 2281701376, 34032, 136, 2281701376, 34032, 136, 2281701376, 34032, 136, 2281701376] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453592639312766_731_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453592639312766_731_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c77dd922 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453592639312766_731_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,251 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 54))) { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 21))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 46))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 38))) { + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 61))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (203 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 3520, 613566756, 1227133513, 7680, 32, 0, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 7296, 15, 4292870144, 6912, 2097152, 1, 6912, 2097152, 1, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050, 6528, 2861214336, 699050] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453607712439604_734_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453607712439604_734_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..96731198 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453607712439604_734_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 33))) { + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((63 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 57))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 56))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4048, 0, 16] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453607889092279_735_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453607889092279_735_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8fd187ef --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453607889092279_735_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,239 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 60))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 37))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 34))) { + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((211 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2752, 17, 0, 2752, 17, 0, 8128, 559240, 0, 8128, 559240, 0, 8128, 559240, 0, 8128, 559240, 0, 8128, 559240, 0, 13504, 0, 2147483648, 13520, 0, 2147483648, 15360, 130, 545390592, 15360, 130, 545390592, 15360, 130, 545390592, 15360, 130, 545390592, 15360, 130, 545390592, 15680, 545392672, 136348168, 15680, 545392672, 136348168, 15680, 545392672, 136348168, 15680, 545392672, 136348168, 15680, 545392672, 136348168, 15680, 545392672, 136348168, 15680, 545392672, 136348168, 15680, 545392672, 136348168, 15680, 545392672, 136348168, 15680, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453609087710147_736_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453609087710147_736_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab8e000b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453609087710147_736_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,141 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4288, 4194560, 278528, 4288, 4194560, 278528, 4288, 4194560, 278528, 4288, 4194560, 278528, 4032, 0, 32, 3648, 524288, 134217733, 3648, 524288, 134217733, 3648, 524288, 134217733, 3648, 524288, 134217733, 5760, 2, 0, 5376, 0, 128, 5120, 2045, 0, 5120, 2045, 0, 5120, 2045, 0, 5120, 2045, 0, 5120, 2045, 0, 5120, 2045, 0, 5120, 2045, 0, 5120, 2045, 0, 5120, 2045, 0, 5120, 2045, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453610151122604_737_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453610151122604_737_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..11dff1d3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453610151122604_737_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,297 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 59))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 42))) { + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (174 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() >= 62)) { + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (262 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (269 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (276 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1600, 134225921, 16384, 1600, 134225921, 16384, 1600, 134225921, 16384, 1600, 134225921, 16384, 1344, 0, 4227858432, 1344, 0, 4227858432, 1344, 0, 4227858432, 1344, 0, 4227858432, 1344, 0, 4227858432, 1344, 0, 4227858432, 5696, 268501008, 1048832, 5696, 268501008, 1048832, 5696, 268501008, 1048832, 5696, 268501008, 1048832, 5696, 268501008, 1048832, 6848, 256, 0, 12672, 0, 1024, 13760, 8, 0, 17664, 8390656, 0, 17664, 8390656, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756453818377054187_739_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756453818377054187_739_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..04402fbc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756453818377054187_739_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 192 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1472, 0, 4294901760, 1216, 32, 512, 1216, 32, 512, 2112, 73, 0, 2112, 73, 0, 2112, 73, 0, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 2688, 1363481681, 340870420, 3600, 3, 0, 3600, 3, 0, 3616, 3, 0, 3616, 3, 0, 5012, 268435712, 16, 5012, 268435712, 16, 5012, 268435712, 16, 5016, 268435712, 16, 5016, 268435712, 16, 5016, 268435712, 16, 5028, 268435712, 16, 5028, 268435712, 16, 5028, 268435712, 16, 5032, 268435712, 16, 5032, 268435712, 16, 5032, 268435712, 16, 5456, 0, 3758096384, 5456, 0, 3758096384, 5456, 0, 3758096384, 5472, 0, 3758096384, 5472, 0, 3758096384, 5472, 0, 3758096384] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756454068142024267_745_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756454068142024267_745_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..95028ca1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756454068142024267_745_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 62))) { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((66 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 45))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((106 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((124 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((131 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 486 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7492, 4, 1140850688, 7492, 4, 1140850688, 7492, 4, 1140850688, 7496, 4, 1140850688, 7496, 4, 1140850688, 7496, 4, 1140850688, 7500, 4, 1140850688, 7500, 4, 1140850688, 7500, 4, 1140850688, 7508, 4, 1140850688, 7508, 4, 1140850688, 7508, 4, 1140850688, 7512, 4, 1140850688, 7512, 4, 1140850688, 7512, 4, 1140850688, 7516, 4, 1140850688, 7516, 4, 1140850688, 7516, 4, 1140850688, 7940, 559240, 0, 7940, 559240, 0, 7940, 559240, 0, 7940, 559240, 0, 7940, 559240, 0, 7944, 559240, 0, 7944, 559240, 0, 7944, 559240, 0, 7944, 559240, 0, 7944, 559240, 0, 7948, 559240, 0, 7948, 559240, 0, 7948, 559240, 0, 7948, 559240, 0, 7948, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7956, 559240, 0, 7960, 559240, 0, 7960, 559240, 0, 7960, 559240, 0, 7960, 559240, 0, 7960, 559240, 0, 7964, 559240, 0, 7964, 559240, 0, 7964, 559240, 0, 7964, 559240, 0, 7964, 559240, 0, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8384, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 8400, 0, 4294901760, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 896, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 912, 0, 4294967288, 6788, 68, 1140850688, 6788, 68, 1140850688, 6788, 68, 1140850688, 6788, 68, 1140850688, 6792, 68, 1140850688, 6792, 68, 1140850688, 6792, 68, 1140850688, 6792, 68, 1140850688, 6796, 68, 1140850688, 6796, 68, 1140850688, 6796, 68, 1140850688, 6796, 68, 1140850688, 6804, 68, 1140850688, 6804, 68, 1140850688, 6804, 68, 1140850688, 6804, 68, 1140850688, 6808, 68, 1140850688, 6808, 68, 1140850688, 6808, 68, 1140850688, 6808, 68, 1140850688, 6812, 68, 1140850688, 6812, 68, 1140850688, 6812, 68, 1140850688, 6812, 68, 1140850688] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756454126713162933_746_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756454126713162933_746_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..81cde95b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756454126713162933_746_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,203 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 57))) { + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((144 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 9984, 272696336, 68174084, 11712, 67125248, 0, 11712, 67125248, 0, 11728, 67125248, 0, 11728, 67125248, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756454134183654291_748_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756454134183654291_748_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fbc7e7fe --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756454134183654291_748_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,145 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((26 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 59)) { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 162 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1680, 1048576, 0, 1684, 1048576, 0, 1688, 1048576, 0, 1696, 1048576, 0, 1700, 1048576, 0, 1704, 1048576, 0, 1712, 1048576, 0, 1716, 1048576, 0, 1720, 1048576, 0, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2128, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2144, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764, 2160, 0, 1431655764] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756454152068637200_749_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756454152068637200_749_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d4b15e83 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756454152068637200_749_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,186 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if ((WaveGetLaneIndex() >= 41)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 32)) { + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 511, 0, 576, 511, 0, 576, 511, 0, 576, 511, 0, 576, 511, 0, 576, 511, 0, 576, 511, 0, 576, 511, 0, 576, 511, 0, 2432, 0, 4160749568, 2432, 0, 4160749568, 2432, 0, 4160749568, 2432, 0, 4160749568, 2432, 0, 4160749568, 3776, 0, 1426063360, 3776, 0, 1426063360, 3776, 0, 1426063360, 3776, 0, 1426063360, 4480, 0, 1426063360, 4480, 0, 1426063360, 4480, 0, 1426063360, 4480, 0, 1426063360, 5952, 4194304, 1024, 5952, 4194304, 1024, 7552, 0, 64, 8832, 32256, 0, 8832, 32256, 0, 8832, 32256, 0, 8832, 32256, 0, 8832, 32256, 0, 8832, 32256, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756454153389449061_750_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756454153389449061_750_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e0a24349 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756454153389449061_750_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,172 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 45)) { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 39))) { + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((126 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 59))) { + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((158 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter2 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 126 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 6928, 262143, 4294934528, 8084, 2, 32, 8084, 2, 32, 8088, 2, 32, 8088, 2, 32, 11264, 8192, 262160, 11264, 8192, 262160, 11264, 8192, 262160] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756454157342749674_751_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756454157342749674_751_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e02da7d3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756454157342749674_751_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,223 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 34))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((88 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 32)) { + if ((WaveGetLaneIndex() >= 53)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 2)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 73, 0, 768, 73, 0, 768, 73, 0, 8768, 272696336, 68174084, 8768, 272696336, 68174084, 8768, 272696336, 68174084, 8768, 272696336, 68174084, 8768, 272696336, 68174084, 8768, 272696336, 68174084, 8768, 272696336, 68174084, 8768, 272696336, 68174084, 8768, 272696336, 68174084, 8768, 272696336, 68174084, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513, 9408, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756454170669018221_753_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756454170669018221_753_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..052f8026 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756454170669018221_753_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,343 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 55))) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 47))) { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((118 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((176 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 8)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((252 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 2)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 40)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((288 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 219 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7568, 8192, 0, 7584, 8192, 0, 7600, 8192, 0, 8016, 65536, 0, 8032, 65536, 0, 8048, 65536, 0, 9664, 524288, 16384, 9664, 524288, 16384, 9680, 524288, 16384, 9680, 524288, 16384, 9696, 524288, 16384, 9696, 524288, 16384, 11264, 0, 2048, 11280, 0, 2048, 11296, 0, 2048, 11840, 2181562368, 2080, 11840, 2181562368, 2080, 11840, 2181562368, 2080, 11840, 2181562368, 2080, 11840, 2181562368, 2080, 11856, 2181562368, 2080, 11856, 2181562368, 2080, 11856, 2181562368, 2080, 11856, 2181562368, 2080, 11856, 2181562368, 2080, 11872, 2181562368, 2080, 11872, 2181562368, 2080, 11872, 2181562368, 2080, 11872, 2181562368, 2080, 11872, 2181562368, 2080, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 12800, 613566756, 1227133513, 14208, 19173961, 0, 14208, 19173961, 0, 14208, 19173961, 0, 14208, 19173961, 0, 14208, 19173961, 0, 14208, 19173961, 0, 14208, 19173961, 0, 14208, 19173961, 0, 14208, 19173961, 0, 15424, 0, 4194304, 15440, 0, 4194304, 15456, 0, 4194304, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084, 16896, 272696336, 68174084] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756454207455054023_754_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756454207455054023_754_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..68a052a2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756454207455054023_754_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,346 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 40)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 26))) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((175 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + } + case 1: { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 51))) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((266 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((273 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } else { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (299 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((323 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((332 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (350 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (354 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 822 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4496, 1, 268435456, 4496, 1, 268435456, 4512, 1, 268435456, 4512, 1, 268435456, 5200, 266305, 268435456, 5200, 266305, 268435456, 5200, 266305, 268435456, 5200, 266305, 268435456, 5200, 266305, 268435456, 5216, 266305, 268435456, 5216, 266305, 268435456, 5216, 266305, 268435456, 5216, 266305, 268435456, 5216, 266305, 268435456, 13760, 16644, 1073741824, 13760, 16644, 1073741824, 13760, 16644, 1073741824, 13760, 16644, 1073741824, 14464, 16644, 1073741824, 14464, 16644, 1073741824, 14464, 16644, 1073741824, 14464, 16644, 1073741824, 19136, 67108880, 1075838980, 19136, 67108880, 1075838980, 19136, 67108880, 1075838980, 19136, 67108880, 1075838980, 19136, 67108880, 1075838980, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20676, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20680, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20692, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 20696, 2863311530, 2862787242, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21252, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21256, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21268, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 21272, 1427457365, 1431655765, 22400, 16384, 16777472, 22400, 16384, 16777472, 22400, 16384, 16777472] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756463766249428512_756_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756463766249428512_756_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f3986dc9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756463766249428512_756_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,96 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 52))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 60))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756463868164056300_758_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756463868164056300_758_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1243de96 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756463868164056300_758_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 53))) { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 63 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3392, 1145324612, 1145324612, 3840, 559240, 0, 3840, 559240, 0, 3840, 559240, 0, 3840, 559240, 0, 3840, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756463868751989832_759_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756463868751989832_759_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..700ccae4 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756463868751989832_759_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,270 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 51)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((251 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 330 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4112, 682, 2684354560, 4112, 682, 2684354560, 4112, 682, 2684354560, 4112, 682, 2684354560, 4112, 682, 2684354560, 4112, 682, 2684354560, 4112, 682, 2684354560, 8464, 43690, 2147483648, 8464, 43690, 2147483648, 8464, 43690, 2147483648, 8464, 43690, 2147483648, 8464, 43690, 2147483648, 8464, 43690, 2147483648, 8464, 43690, 2147483648, 8464, 43690, 2147483648, 8464, 43690, 2147483648, 11072, 0, 65536, 11088, 0, 65536, 11104, 0, 65536, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 13504, 1145324612, 1145324612, 14720, 2184, 2290649088, 14720, 2184, 2290649088, 14720, 2184, 2290649088, 14720, 2184, 2290649088, 14720, 2184, 2290649088, 14720, 2184, 2290649088, 14720, 2184, 2290649088, 14720, 2184, 2290649088, 14720, 2184, 2290649088, 14736, 2184, 2290649088, 14736, 2184, 2290649088, 14736, 2184, 2290649088, 14736, 2184, 2290649088, 14736, 2184, 2290649088, 14736, 2184, 2290649088, 14736, 2184, 2290649088, 14736, 2184, 2290649088, 14736, 2184, 2290649088, 14752, 2184, 2290649088, 14752, 2184, 2290649088, 14752, 2184, 2290649088, 14752, 2184, 2290649088, 14752, 2184, 2290649088, 14752, 2184, 2290649088, 14752, 2184, 2290649088, 14752, 2184, 2290649088, 14752, 2184, 2290649088, 15620, 34952, 0, 15620, 34952, 0, 15620, 34952, 0, 15620, 34952, 0, 15624, 34952, 0, 15624, 34952, 0, 15624, 34952, 0, 15624, 34952, 0, 15636, 34952, 0, 15636, 34952, 0, 15636, 34952, 0, 15636, 34952, 0, 15640, 34952, 0, 15640, 34952, 0, 15640, 34952, 0, 15640, 34952, 0, 15652, 34952, 0, 15652, 34952, 0, 15652, 34952, 0, 15652, 34952, 0, 15656, 34952, 0, 15656, 34952, 0, 15656, 34952, 0, 15656, 34952, 0, 16068, 0, 2290614272, 16068, 0, 2290614272, 16068, 0, 2290614272, 16068, 0, 2290614272, 16072, 0, 2290614272, 16072, 0, 2290614272, 16072, 0, 2290614272, 16072, 0, 2290614272, 16084, 0, 2290614272, 16084, 0, 2290614272, 16084, 0, 2290614272, 16084, 0, 2290614272, 16088, 0, 2290614272, 16088, 0, 2290614272, 16088, 0, 2290614272, 16088, 0, 2290614272, 16100, 0, 2290614272, 16100, 0, 2290614272, 16100, 0, 2290614272, 16100, 0, 2290614272, 16104, 0, 2290614272, 16104, 0, 2290614272, 16104, 0, 2290614272, 16104, 0, 2290614272] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756463927497607984_760_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756463927497607984_760_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f6e02daa --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756463927497607984_760_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,277 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 26))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((97 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((162 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (219 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 47))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (237 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (255 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (266 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 180 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 4176, 0, 32768, 4192, 0, 32768, 4208, 0, 32768, 6928, 512, 0, 6944, 512, 0, 6960, 512, 0, 9024, 85, 0, 9024, 85, 0, 9024, 85, 0, 9024, 85, 0, 10384, 8, 2181038080, 10384, 8, 2181038080, 10384, 8, 2181038080, 10400, 8, 2181038080, 10400, 8, 2181038080, 10400, 8, 2181038080, 10416, 8, 2181038080, 10416, 8, 2181038080, 10416, 8, 2181038080, 12224, 0, 134217728, 12800, 0, 33288, 12800, 0, 33288, 12800, 0, 33288, 13440, 73, 0, 13440, 73, 0, 13440, 73, 0, 14016, 272696336, 68174084, 14016, 272696336, 68174084, 14016, 272696336, 68174084, 14016, 272696336, 68174084, 14016, 272696336, 68174084, 14016, 272696336, 68174084, 14016, 272696336, 68174084, 14016, 272696336, 68174084, 14016, 272696336, 68174084, 14016, 272696336, 68174084, 15168, 36, 1224736768, 15168, 36, 1224736768, 15168, 36, 1224736768, 15168, 36, 1224736768, 15168, 36, 1224736768, 16320, 36, 1226833920, 16320, 36, 1226833920, 16320, 36, 1226833920, 16320, 36, 1226833920, 16320, 36, 1226833920, 16320, 36, 1226833920, 17024, 18724, 1226833920, 17024, 18724, 1226833920, 17024, 18724, 1226833920, 17024, 18724, 1226833920, 17024, 18724, 1226833920, 17024, 18724, 1226833920, 17024, 18724, 1226833920, 17024, 18724, 1226833920, 17024, 18724, 1226833920] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756463996551220782_762_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756463996551220782_762_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7f4e7914 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756463996551220782_762_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,207 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((114 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((123 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 387 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5696, 0, 4294967294, 5312, 536870912, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 5056, 1048575, 0, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7296, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765, 7312, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756463999062342390_763_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756463999062342390_763_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e464a5ce --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756463999062342390_763_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 42 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4608, 1073741824, 0, 4352, 2097280, 1073774592, 4352, 2097280, 1073774592, 4352, 2097280, 1073774592, 4352, 2097280, 1073774592, 4096, 1048580, 540672, 4096, 1048580, 540672, 4096, 1048580, 540672, 4096, 1048580, 540672, 3712, 2147483648, 4198400, 3712, 2147483648, 4198400, 3712, 2147483648, 4198400, 3456, 134217728, 262144, 3456, 134217728, 262144] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756463999827766285_764_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756463999827766285_764_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8769cc4a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756463999827766285_764_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,145 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (13 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 29))) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 252 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 832, 1431655765, 1431655765, 1984, 5, 1073741824, 1984, 5, 1073741824, 1984, 5, 1073741824, 3264, 5, 1431633920, 3264, 5, 1431633920, 3264, 5, 1431633920, 3264, 5, 1431633920, 3264, 5, 1431633920, 3264, 5, 1431633920, 3264, 5, 1431633920, 3264, 5, 1431633920, 3264, 5, 1431633920, 3264, 5, 1431633920, 3280, 5, 1431633920, 3280, 5, 1431633920, 3280, 5, 1431633920, 3280, 5, 1431633920, 3280, 5, 1431633920, 3280, 5, 1431633920, 3280, 5, 1431633920, 3280, 5, 1431633920, 3280, 5, 1431633920, 3280, 5, 1431633920, 3840, 5, 1431633920, 3840, 5, 1431633920, 3840, 5, 1431633920, 3840, 5, 1431633920, 3840, 5, 1431633920, 3840, 5, 1431633920, 3840, 5, 1431633920, 3840, 5, 1431633920, 3840, 5, 1431633920, 3840, 5, 1431633920, 3856, 5, 1431633920, 3856, 5, 1431633920, 3856, 5, 1431633920, 3856, 5, 1431633920, 3856, 5, 1431633920, 3856, 5, 1431633920, 3856, 5, 1431633920, 3856, 5, 1431633920, 3856, 5, 1431633920, 3856, 5, 1431633920, 7936, 5, 1431568384, 7936, 5, 1431568384, 7936, 5, 1431568384, 7936, 5, 1431568384, 7936, 5, 1431568384, 7936, 5, 1431568384, 7936, 5, 1431568384, 7936, 5, 1431568384, 7936, 5, 1431568384] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464159450876339_767_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464159450876339_767_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..260d7638 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464159450876339_767_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,267 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((57 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 2)) { + break; + } + } + } + case 3: { + if ((WaveGetLaneIndex() == 12)) { + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((142 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((170 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 34))) { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (213 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((259 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((276 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((293 << 6) | (i4 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((300 << 6) | (i4 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((315 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [17664, 0, 1, 17680, 0, 1, 576, 17, 0, 576, 17, 0, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2192, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 2208, 1145324612, 1145324612, 3664, 0, 262144, 3668, 0, 262144, 3680, 0, 262144, 3684, 0, 262144, 4880, 536870912, 32, 4880, 536870912, 32, 4884, 536870912, 32, 4884, 536870912, 32, 4896, 536870912, 32, 4896, 536870912, 32, 4900, 536870912, 32, 4900, 536870912, 32] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464178784622318_768_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464178784622318_768_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..27711727 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464178784622318_768_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,266 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((122 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((155 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((195 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (206 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9920, 8192, 256, 9920, 8192, 256, 9936, 8192, 256, 9936, 8192, 256, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 10240, 2004318071, 2004318071, 11392, 2184, 2147483648, 11392, 2184, 2147483648, 11392, 2184, 2147483648, 11392, 2184, 2147483648, 12480, 2184, 2290614272, 12480, 2184, 2290614272, 12480, 2184, 2290614272, 12480, 2184, 2290614272, 12480, 2184, 2290614272, 12480, 2184, 2290614272, 12480, 2184, 2290614272, 12496, 2184, 2290614272, 12496, 2184, 2290614272, 12496, 2184, 2290614272, 12496, 2184, 2290614272, 12496, 2184, 2290614272, 12496, 2184, 2290614272, 12496, 2184, 2290614272, 13184, 0, 2290614272, 13184, 0, 2290614272, 13184, 0, 2290614272, 13184, 0, 2290614272, 576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 2752, 85, 0, 3392, 8, 0, 5696, 545392672, 136348168, 5696, 545392672, 136348168, 5696, 545392672, 136348168, 5696, 545392672, 136348168, 5696, 545392672, 136348168, 5696, 545392672, 136348168, 5696, 545392672, 136348168, 5696, 545392672, 136348168, 5696, 545392672, 136348168, 5696, 545392672, 136348168, 6336, 17, 0, 6336, 17, 0, 7808, 0, 1048576, 7824, 0, 1048576] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464302795304830_772_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464302795304830_772_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7f6ae6c0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464302795304830_772_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,163 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 25))) { + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 12))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((115 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((126 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (135 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 48 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3024, 4096, 4194320, 3024, 4096, 4194320, 3024, 4096, 4194320, 3040, 4096, 4194320, 3040, 4096, 4194320, 3040, 4096, 4194320, 3056, 4096, 4194320, 3056, 4096, 4194320, 3056, 4096, 4194320, 8080, 0, 4194304, 8096, 0, 4194304, 8112, 0, 4194304] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464414695971986_774_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464414695971986_774_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..90a7e31c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464414695971986_774_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 59))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 14)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((133 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5056, 0, 1431655764, 5968, 0, 1409286144, 5968, 0, 1409286144, 5968, 0, 1409286144, 5984, 0, 1409286144, 5984, 0, 1409286144, 5984, 0, 1409286144, 10112, 2048, 128, 10112, 2048, 128, 11392, 10, 2852126720, 11392, 10, 2852126720, 11392, 10, 2852126720, 11392, 10, 2852126720, 11392, 10, 2852126720, 11392, 10, 2852126720, 12352, 32, 2147483648, 12352, 32, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464416963147272_775_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464416963147272_775_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7db93120 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464416963147272_775_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,197 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 49)) { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((155 << 6) | (i1 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 246 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 585, 0, 1040, 585, 0, 1040, 585, 0, 1040, 585, 0, 1056, 585, 0, 1056, 585, 0, 1056, 585, 0, 1056, 585, 0, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1936, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 1952, 136348168, 2181570690, 4160, 272696336, 68174084, 4160, 272696336, 68174084, 4160, 272696336, 68174084, 4160, 272696336, 68174084, 4160, 272696336, 68174084, 4160, 272696336, 68174084, 4160, 272696336, 68174084, 4160, 272696336, 68174084, 4160, 272696336, 68174084, 4160, 272696336, 68174084, 5248, 545392672, 136348168, 5248, 545392672, 136348168, 5248, 545392672, 136348168, 5248, 545392672, 136348168, 5248, 545392672, 136348168, 5248, 545392672, 136348168, 5248, 545392672, 136348168, 5248, 545392672, 136348168, 5248, 545392672, 136348168, 5248, 545392672, 136348168, 5264, 545392672, 136348168, 5264, 545392672, 136348168, 5264, 545392672, 136348168, 5264, 545392672, 136348168, 5264, 545392672, 136348168, 5264, 545392672, 136348168, 5264, 545392672, 136348168, 5264, 545392672, 136348168, 5264, 545392672, 136348168, 5264, 545392672, 136348168, 9024, 0, 1, 9040, 0, 1, 10496, 545392672, 136348168, 10496, 545392672, 136348168, 10496, 545392672, 136348168, 10496, 545392672, 136348168, 10496, 545392672, 136348168, 10496, 545392672, 136348168, 10496, 545392672, 136348168, 10496, 545392672, 136348168, 10496, 545392672, 136348168, 10496, 545392672, 136348168, 10512, 545392672, 136348168, 10512, 545392672, 136348168, 10512, 545392672, 136348168, 10512, 545392672, 136348168, 10512, 545392672, 136348168, 10512, 545392672, 136348168, 10512, 545392672, 136348168, 10512, 545392672, 136348168, 10512, 545392672, 136348168, 10512, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464424092181343_776_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464424092181343_776_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b4d0d185 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464424092181343_776_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,362 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 15))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 35))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 24))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((250 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 10)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((290 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((300 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((321 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((328 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((335 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((354 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((361 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (382 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 564 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 7296, 65538, 0, 7296, 65538, 0, 9728, 524288, 0, 11344, 4, 1073741824, 11344, 4, 1073741824, 11360, 4, 1073741824, 11360, 4, 1073741824, 15568, 4, 1227128832, 15568, 4, 1227128832, 15568, 4, 1227128832, 15568, 4, 1227128832, 15568, 4, 1227128832, 15568, 4, 1227128832, 15568, 4, 1227128832, 15584, 4, 1227128832, 15584, 4, 1227128832, 15584, 4, 1227128832, 15584, 4, 1227128832, 15584, 4, 1227128832, 15584, 4, 1227128832, 15584, 4, 1227128832, 16016, 0, 1227133512, 16016, 0, 1227133512, 16016, 0, 1227133512, 16016, 0, 1227133512, 16016, 0, 1227133512, 16016, 0, 1227133512, 16016, 0, 1227133512, 16016, 0, 1227133512, 16016, 0, 1227133512, 16016, 0, 1227133512, 16032, 0, 1227133512, 16032, 0, 1227133512, 16032, 0, 1227133512, 16032, 0, 1227133512, 16032, 0, 1227133512, 16032, 0, 1227133512, 16032, 0, 1227133512, 16032, 0, 1227133512, 16032, 0, 1227133512, 16032, 0, 1227133512, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 17408, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22656, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22660, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22664, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22672, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22676, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 22680, 1023, 4026531840, 23808, 63, 4026531840, 23808, 63, 4026531840, 23808, 63, 4026531840, 23808, 63, 4026531840, 23808, 63, 4026531840, 23808, 63, 4026531840, 23808, 63, 4026531840, 23808, 63, 4026531840, 23808, 63, 4026531840, 23808, 63, 4026531840, 24448, 85, 0, 24448, 85, 0, 24448, 85, 0, 24448, 85, 0, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765, 25024, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464434952630135_777_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464434952630135_777_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bdd55e19 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464434952630135_777_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,329 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 54)) { + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + } else { + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((153 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 42))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 207 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 6592, 117440511, 0, 7040, 0, 4278190080, 7040, 0, 4278190080, 7040, 0, 4278190080, 7040, 0, 4278190080, 7040, 0, 4278190080, 7040, 0, 4278190080, 7040, 0, 4278190080, 7040, 0, 4278190080, 7680, 17, 0, 7680, 17, 0, 8640, 536870912, 0, 8656, 536870912, 0, 9796, 0, 536870912, 9812, 0, 536870912, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 10944, 1145324612, 1145324612, 12288, 8, 2148007936, 12288, 8, 2148007936, 12288, 8, 2148007936, 15616, 8390656, 134250504, 15616, 8390656, 134250504, 15616, 8390656, 134250504, 15616, 8390656, 134250504, 15616, 8390656, 134250504, 16192, 8390656, 134250504, 16192, 8390656, 134250504, 16192, 8390656, 134250504, 16192, 8390656, 134250504, 16192, 8390656, 134250504] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464498309146488_779_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464498309146488_779_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9b9bcb85 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464498309146488_779_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,82 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 66 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 3, 4160749568, 1088, 3, 4160749568, 1088, 3, 4160749568, 1088, 3, 4160749568, 1088, 3, 4160749568, 1088, 3, 4160749568, 1088, 3, 4160749568, 2304, 15, 2147483648, 2304, 15, 2147483648, 2304, 15, 2147483648, 2304, 15, 2147483648, 2304, 15, 2147483648, 2320, 15, 2147483648, 2320, 15, 2147483648, 2320, 15, 2147483648, 2320, 15, 2147483648, 2320, 15, 2147483648, 2336, 15, 2147483648, 2336, 15, 2147483648, 2336, 15, 2147483648, 2336, 15, 2147483648, 2336, 15, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464707153442435_781_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464707153442435_781_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..aab966e5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464707153442435_781_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 27))) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 20))) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 5)) { + if ((WaveGetLaneIndex() == 46)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 32)) { + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 23)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464825371005336_784_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464825371005336_784_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cac51aea --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464825371005336_784_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756464825588040313_785_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756464825588040313_785_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c05d92b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756464825588040313_785_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,376 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 34)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((34 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i2 == 2)) { + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 55))) { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 37))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((200 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 0))) { + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (241 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((264 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 6)) { + for (uint i6 = 0; (i6 < 3); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((282 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((289 << 6) | (i5 << 4)) | (i6 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + continue; + } + if ((i6 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((325 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((335 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((344 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((369 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((388 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (407 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (430 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (440 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (449 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (453 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 141 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 3584, 87381, 1430257664, 4996, 0, 256, 5000, 0, 256, 5012, 0, 256, 5016, 0, 256, 5028, 0, 256, 5032, 0, 256, 11648, 2080, 0, 11648, 2080, 0, 11664, 2080, 0, 11664, 2080, 0, 11680, 2080, 0, 11680, 2080, 0, 12800, 32, 0, 12816, 32, 0, 12832, 32, 0, 13248, 0, 136314880, 13248, 0, 136314880, 13264, 0, 136314880, 13264, 0, 136314880, 13280, 0, 136314880, 13280, 0, 136314880, 16896, 1024, 0, 16912, 1024, 0, 21440, 1, 0, 21456, 1, 0, 27520, 16779264, 536903680, 27520, 16779264, 536903680, 27520, 16779264, 536903680, 27520, 16779264, 536903680, 28160, 85, 0, 28160, 85, 0, 28160, 85, 0, 28160, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756465104707477143_787_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756465104707477143_787_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e6335673 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756465104707477143_787_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,146 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 49))) { + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 29)) { + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((84 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((95 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 114 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 3856, 0, 2290649088, 3856, 0, 2290649088, 3856, 0, 2290649088, 3856, 0, 2290649088, 3856, 0, 2290649088, 3856, 0, 2290649088, 3872, 0, 2290649088, 3872, 0, 2290649088, 3872, 0, 2290649088, 3872, 0, 2290649088, 3872, 0, 2290649088, 3872, 0, 2290649088, 3888, 0, 2290649088, 3888, 0, 2290649088, 3888, 0, 2290649088, 3888, 0, 2290649088, 3888, 0, 2290649088, 3888, 0, 2290649088, 5396, 32768, 0, 5400, 32768, 0, 5412, 32768, 0, 5416, 32768, 0, 5428, 32768, 0, 5432, 32768, 0, 6100, 32768, 134217728, 6100, 32768, 134217728, 6104, 32768, 134217728, 6104, 32768, 134217728, 6116, 32768, 134217728, 6116, 32768, 134217728, 6120, 32768, 134217728, 6120, 32768, 134217728, 6132, 32768, 134217728, 6132, 32768, 134217728, 6136, 32768, 134217728, 6136, 32768, 134217728] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756465219262631129_790_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756465219262631129_790_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3620b617 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756465219262631129_790_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,76 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 60 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 2099200, 524288, 1344, 2099200, 524288, 1344, 2099200, 524288, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624, 1088, 0, 4294426624] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756465219519793325_791_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756465219519793325_791_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..328b2ac0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756465219519793325_791_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,151 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 165 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3584, 2863311530, 2863311530, 3328, 4096, 1073741840, 3328, 4096, 1073741840, 3328, 4096, 1073741840, 2816, 0, 65536, 2560, 1365, 0, 2560, 1365, 0, 2560, 1365, 0, 2560, 1365, 0, 2560, 1365, 0, 2560, 1365, 0, 4224, 85, 0, 4224, 85, 0, 4224, 85, 0, 4224, 85, 0, 7744, 8389120, 8405000, 7744, 8389120, 8405000, 7744, 8389120, 8405000, 7744, 8389120, 8405000, 7744, 8389120, 8405000, 7360, 528384, 2097664, 7360, 528384, 2097664, 7360, 528384, 2097664, 7360, 528384, 2097664] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756465220825357640_792_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756465220825357640_792_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53c93eda --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756465220825357640_792_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756465221172756605_793_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756465221172756605_793_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..17a863a1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756465221172756605_793_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 52))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 24)) { + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (119 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((i1 == 1)) { + continue; + } + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 195 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3648, 64, 0, 3664, 64, 0, 4352, 64, 0, 4368, 64, 0, 6336, 1, 0, 6976, 1, 0, 8896, 272696336, 68174084, 8896, 272696336, 68174084, 8896, 272696336, 68174084, 8896, 272696336, 68174084, 8896, 272696336, 68174084, 8896, 272696336, 68174084, 8896, 272696336, 68174084, 8896, 272696336, 68174084, 8896, 272696336, 68174084, 8896, 272696336, 68174084, 9536, 4, 0, 11712, 4, 1227132928, 11712, 4, 1227132928, 11712, 4, 1227132928, 11712, 4, 1227132928, 11712, 4, 1227132928, 11712, 4, 1227132928, 11712, 4, 1227132928, 11712, 4, 1227132928, 11716, 4, 1227132928, 11716, 4, 1227132928, 11716, 4, 1227132928, 11716, 4, 1227132928, 11716, 4, 1227132928, 11716, 4, 1227132928, 11716, 4, 1227132928, 11716, 4, 1227132928, 11720, 4, 1227132928, 11720, 4, 1227132928, 11720, 4, 1227132928, 11720, 4, 1227132928, 11720, 4, 1227132928, 11720, 4, 1227132928, 11720, 4, 1227132928, 11720, 4, 1227132928, 11728, 4, 1227132928, 11728, 4, 1227132928, 11728, 4, 1227132928, 11728, 4, 1227132928, 11728, 4, 1227132928, 11728, 4, 1227132928, 11728, 4, 1227132928, 11728, 4, 1227132928, 11732, 4, 1227132928, 11732, 4, 1227132928, 11732, 4, 1227132928, 11732, 4, 1227132928, 11732, 4, 1227132928, 11732, 4, 1227132928, 11732, 4, 1227132928, 11732, 4, 1227132928, 11736, 4, 1227132928, 11736, 4, 1227132928, 11736, 4, 1227132928, 11736, 4, 1227132928, 11736, 4, 1227132928, 11736, 4, 1227132928, 11736, 4, 1227132928, 11736, 4, 1227132928] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756465604166506219_797_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756465604166506219_797_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53c93eda --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756465604166506219_797_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756465604447082944_798_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756465604447082944_798_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..07d97a36 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756465604447082944_798_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,243 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((31 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 36)) { + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 60)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 60))) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 54))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 44))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 5))) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() == 60)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (244 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 534 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 153391689, 0, 1040, 153391689, 0, 1040, 153391689, 0, 1040, 153391689, 0, 1040, 153391689, 0, 1040, 153391689, 0, 1040, 153391689, 0, 1040, 153391689, 0, 1040, 153391689, 0, 1040, 153391689, 0, 1056, 153391689, 0, 1056, 153391689, 0, 1056, 153391689, 0, 1056, 153391689, 0, 1056, 153391689, 0, 1056, 153391689, 0, 1056, 153391689, 0, 1056, 153391689, 0, 1056, 153391689, 0, 1056, 153391689, 0, 1072, 153391689, 0, 1072, 153391689, 0, 1072, 153391689, 0, 1072, 153391689, 0, 1072, 153391689, 0, 1072, 153391689, 0, 1072, 153391689, 0, 1072, 153391689, 0, 1072, 153391689, 0, 1072, 153391689, 0, 2000, 0, 2454257664, 2000, 0, 2454257664, 2000, 0, 2454257664, 2000, 0, 2454257664, 2000, 0, 2454257664, 2000, 0, 2454257664, 2004, 0, 2454257664, 2004, 0, 2454257664, 2004, 0, 2454257664, 2004, 0, 2454257664, 2004, 0, 2454257664, 2004, 0, 2454257664, 2008, 0, 2454257664, 2008, 0, 2454257664, 2008, 0, 2454257664, 2008, 0, 2454257664, 2008, 0, 2454257664, 2008, 0, 2454257664, 2016, 0, 2454257664, 2016, 0, 2454257664, 2016, 0, 2454257664, 2016, 0, 2454257664, 2016, 0, 2454257664, 2016, 0, 2454257664, 2020, 0, 2454257664, 2020, 0, 2454257664, 2020, 0, 2454257664, 2020, 0, 2454257664, 2020, 0, 2454257664, 2020, 0, 2454257664, 2024, 0, 2454257664, 2024, 0, 2454257664, 2024, 0, 2454257664, 2024, 0, 2454257664, 2024, 0, 2454257664, 2024, 0, 2454257664, 2032, 0, 2454257664, 2032, 0, 2454257664, 2032, 0, 2454257664, 2032, 0, 2454257664, 2032, 0, 2454257664, 2032, 0, 2454257664, 2036, 0, 2454257664, 2036, 0, 2454257664, 2036, 0, 2454257664, 2036, 0, 2454257664, 2036, 0, 2454257664, 2036, 0, 2454257664, 2040, 0, 2454257664, 2040, 0, 2454257664, 2040, 0, 2454257664, 2040, 0, 2454257664, 2040, 0, 2454257664, 2040, 0, 2454257664, 3216, 585, 0, 3216, 585, 0, 3216, 585, 0, 3216, 585, 0, 3220, 585, 0, 3220, 585, 0, 3220, 585, 0, 3220, 585, 0, 3224, 585, 0, 3224, 585, 0, 3224, 585, 0, 3224, 585, 0, 3232, 585, 0, 3232, 585, 0, 3232, 585, 0, 3232, 585, 0, 3236, 585, 0, 3236, 585, 0, 3236, 585, 0, 3236, 585, 0, 3240, 585, 0, 3240, 585, 0, 3240, 585, 0, 3240, 585, 0, 3248, 585, 0, 3248, 585, 0, 3248, 585, 0, 3248, 585, 0, 3252, 585, 0, 3252, 585, 0, 3252, 585, 0, 3252, 585, 0, 3256, 585, 0, 3256, 585, 0, 3256, 585, 0, 3256, 585, 0, 3856, 0, 2415919104, 3856, 0, 2415919104, 3872, 0, 2415919104, 3872, 0, 2415919104, 3888, 0, 2415919104, 3888, 0, 2415919104, 9024, 272696336, 68174084, 9024, 272696336, 68174084, 9024, 272696336, 68174084, 9024, 272696336, 68174084, 9024, 272696336, 68174084, 9024, 272696336, 68174084, 9024, 272696336, 68174084, 9024, 272696336, 68174084, 9024, 272696336, 68174084, 9024, 272696336, 68174084, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269, 16384, 3067833782, 1840700269] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756465898697784352_801_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756465898697784352_801_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e48ee6ef --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756465898697784352_801_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,348 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 49))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 47)) { + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 3: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 43))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 14))) { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((233 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((244 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 57))) { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (288 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((327 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((342 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (360 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 876 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5888, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 5904, 1145324612, 1145324612, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6976, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6980, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6992, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 6996, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7680, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 7696, 2863311530, 2863311530, 9280, 1, 0, 9856, 268501008, 1048832, 9856, 268501008, 1048832, 9856, 268501008, 1048832, 9856, 268501008, 1048832, 9856, 268501008, 1048832, 10176, 1048832, 16781313, 10176, 1048832, 16781313, 10176, 1048832, 16781313, 10176, 1048832, 16781313, 10176, 1048832, 16781313, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 10752, 286331153, 286331153, 11904, 8738, 0, 11904, 8738, 0, 11904, 8738, 0, 11904, 8738, 0, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 15936, 1145324612, 1145324612, 19344, 0, 2290649216, 19344, 0, 2290649216, 19344, 0, 2290649216, 19344, 0, 2290649216, 19344, 0, 2290649216, 19344, 0, 2290649216, 19344, 0, 2290649216, 20944, 32768, 0, 20948, 32768, 0, 20952, 32768, 0, 21904, 0, 2290649224, 21904, 0, 2290649224, 21904, 0, 2290649224, 21904, 0, 2290649224, 21904, 0, 2290649224, 21904, 0, 2290649224, 21904, 0, 2290649224, 21904, 0, 2290649224, 23040, 32768, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756465939333865569_802_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756465939333865569_802_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8f5e36c0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756465939333865569_802_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,142 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 27)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() == 62)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3984, 0, 4194304, 4000, 0, 4194304, 4016, 0, 4194304] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466001700741189_805_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466001700741189_805_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cac51aea --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466001700741189_805_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466001929026563_806_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466001929026563_806_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..83950e2f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466001929026563_806_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,191 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((78 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((89 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((106 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 342 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 2621440, 33554432, 2688, 2621440, 33554432, 2688, 2621440, 33554432, 2704, 2621440, 33554432, 2704, 2621440, 33554432, 2704, 2621440, 33554432, 4996, 10, 2684354560, 4996, 10, 2684354560, 4996, 10, 2684354560, 4996, 10, 2684354560, 5000, 10, 2684354560, 5000, 10, 2684354560, 5000, 10, 2684354560, 5000, 10, 2684354560, 5004, 10, 2684354560, 5004, 10, 2684354560, 5004, 10, 2684354560, 5004, 10, 2684354560, 5012, 10, 2684354560, 5012, 10, 2684354560, 5012, 10, 2684354560, 5012, 10, 2684354560, 5016, 10, 2684354560, 5016, 10, 2684354560, 5016, 10, 2684354560, 5016, 10, 2684354560, 5020, 10, 2684354560, 5020, 10, 2684354560, 5020, 10, 2684354560, 5020, 10, 2684354560, 5700, 0, 2862612480, 5700, 0, 2862612480, 5700, 0, 2862612480, 5700, 0, 2862612480, 5700, 0, 2862612480, 5700, 0, 2862612480, 5704, 0, 2862612480, 5704, 0, 2862612480, 5704, 0, 2862612480, 5704, 0, 2862612480, 5704, 0, 2862612480, 5704, 0, 2862612480, 5708, 0, 2862612480, 5708, 0, 2862612480, 5708, 0, 2862612480, 5708, 0, 2862612480, 5708, 0, 2862612480, 5708, 0, 2862612480, 5716, 0, 2862612480, 5716, 0, 2862612480, 5716, 0, 2862612480, 5716, 0, 2862612480, 5716, 0, 2862612480, 5716, 0, 2862612480, 5720, 0, 2862612480, 5720, 0, 2862612480, 5720, 0, 2862612480, 5720, 0, 2862612480, 5720, 0, 2862612480, 5720, 0, 2862612480, 5724, 0, 2862612480, 5724, 0, 2862612480, 5724, 0, 2862612480, 5724, 0, 2862612480, 5724, 0, 2862612480, 5724, 0, 2862612480, 6788, 2097152, 524288, 6788, 2097152, 524288, 6792, 2097152, 524288, 6792, 2097152, 524288, 6796, 2097152, 524288, 6796, 2097152, 524288, 6804, 2097152, 524288, 6804, 2097152, 524288, 6808, 2097152, 524288, 6808, 2097152, 524288, 6812, 2097152, 524288, 6812, 2097152, 524288, 12672, 8, 0, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12288, 65527, 4294901760, 12032, 4194304, 4096, 12032, 4194304, 4096, 11776, 0, 33, 11776, 0, 33] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466034003904208_807_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466034003904208_807_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c19638c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466034003904208_807_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,166 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 48)) { + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 56)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3344, 0, 67108864, 5136, 18, 613548032, 5136, 18, 613548032, 5136, 18, 613548032, 5136, 18, 613548032, 5136, 18, 613548032, 5136, 18, 613548032, 5136, 18, 613548032, 5584, 128, 0, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513, 6080, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466034736425750_808_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466034736425750_808_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f412b81c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466034736425750_808_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,120 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 47))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 56)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (45 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1872, 1, 0, 1888, 1, 0, 1904, 1, 0, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 2880, 272696336, 68174084, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513, 3200, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466348681595334_810_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466348681595334_810_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8981b649 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466348681595334_810_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,318 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 34))) { + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (90 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 57)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((128 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((135 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((227 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 28)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (256 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (260 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 393 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 4544, 8521760, 136348168, 4544, 8521760, 136348168, 4544, 8521760, 136348168, 4544, 8521760, 136348168, 4544, 8521760, 136348168, 4544, 8521760, 136348168, 4544, 8521760, 136348168, 4544, 8521760, 136348168, 4544, 8521760, 136348168, 9088, 0, 16, 9104, 0, 16, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9664, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 9680, 1431655765, 1431655765, 11072, 17, 0, 11072, 17, 0, 12560, 0, 1145044992, 12560, 0, 1145044992, 12560, 0, 1145044992, 12576, 0, 1145044992, 12576, 0, 1145044992, 12576, 0, 1145044992, 13840, 17476, 1140850688, 13840, 17476, 1140850688, 13840, 17476, 1140850688, 13840, 17476, 1140850688, 13840, 17476, 1140850688, 13840, 17476, 1140850688, 13856, 17476, 1140850688, 13856, 17476, 1140850688, 13856, 17476, 1140850688, 13856, 17476, 1140850688, 13856, 17476, 1140850688, 13856, 17476, 1140850688, 14544, 68, 1145307136, 14544, 68, 1145307136, 14544, 68, 1145307136, 14544, 68, 1145307136, 14544, 68, 1145307136, 14544, 68, 1145307136, 14560, 68, 1145307136, 14560, 68, 1145307136, 14560, 68, 1145307136, 14560, 68, 1145307136, 14560, 68, 1145307136, 14560, 68, 1145307136, 15952, 71582788, 0, 15952, 71582788, 0, 15952, 71582788, 0, 15952, 71582788, 0, 15952, 71582788, 0, 15952, 71582788, 0, 15952, 71582788, 0, 15968, 71582788, 0, 15968, 71582788, 0, 15968, 71582788, 0, 15968, 71582788, 0, 15968, 71582788, 0, 15968, 71582788, 0, 15968, 71582788, 0, 16384, 559240, 0, 16384, 559240, 0, 16384, 559240, 0, 16384, 559240, 0, 16384, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466431050150960_812_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466431050150960_812_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..464be66f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466431050150960_812_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,154 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (16 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((50 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((74 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((85 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + if ((i0 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 630 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1536, 2, 0, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1280, 1431655765, 1431655765, 1024, 168, 0, 1024, 168, 0, 1024, 168, 0, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3204, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3220, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3236, 8191, 3221225472, 3844, 73, 0, 3844, 73, 0, 3844, 73, 0, 3860, 73, 0, 3860, 73, 0, 3860, 73, 0, 3876, 73, 0, 3876, 73, 0, 3876, 73, 0, 4420, 272696336, 68174084, 4420, 272696336, 68174084, 4420, 272696336, 68174084, 4420, 272696336, 68174084, 4420, 272696336, 68174084, 4420, 272696336, 68174084, 4420, 272696336, 68174084, 4420, 272696336, 68174084, 4420, 272696336, 68174084, 4420, 272696336, 68174084, 4436, 272696336, 68174084, 4436, 272696336, 68174084, 4436, 272696336, 68174084, 4436, 272696336, 68174084, 4436, 272696336, 68174084, 4436, 272696336, 68174084, 4436, 272696336, 68174084, 4436, 272696336, 68174084, 4436, 272696336, 68174084, 4436, 272696336, 68174084, 4452, 272696336, 68174084, 4452, 272696336, 68174084, 4452, 272696336, 68174084, 4452, 272696336, 68174084, 4452, 272696336, 68174084, 4452, 272696336, 68174084, 4452, 272696336, 68174084, 4452, 272696336, 68174084, 4452, 272696336, 68174084, 4452, 272696336, 68174084, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4740, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4756, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 4772, 613566756, 1227133513, 5444, 31, 4026531840, 5444, 31, 4026531840, 5444, 31, 4026531840, 5444, 31, 4026531840, 5444, 31, 4026531840, 5444, 31, 4026531840, 5444, 31, 4026531840, 5444, 31, 4026531840, 5444, 31, 4026531840, 5460, 31, 4026531840, 5460, 31, 4026531840, 5460, 31, 4026531840, 5460, 31, 4026531840, 5460, 31, 4026531840, 5460, 31, 4026531840, 5460, 31, 4026531840, 5460, 31, 4026531840, 5460, 31, 4026531840, 5476, 31, 4026531840, 5476, 31, 4026531840, 5476, 31, 4026531840, 5476, 31, 4026531840, 5476, 31, 4026531840, 5476, 31, 4026531840, 5476, 31, 4026531840, 5476, 31, 4026531840, 5476, 31, 4026531840] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466466468010029_813_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466466468010029_813_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a134eb5a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466466468010029_813_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 7))) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 26))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((85 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((102 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((109 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 828 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 4352, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5440, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 5456, 1431655765, 1431655765, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6528, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6532, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6544, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6548, 1073741823, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6976, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6980, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6992, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0, 6996, 32767, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466481242296255_814_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466481242296255_814_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca54fb1a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466481242296255_814_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 50))) { + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 411 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 73, 0, 1792, 73, 0, 1792, 73, 0, 2368, 272696336, 68174084, 2368, 272696336, 68174084, 2368, 272696336, 68174084, 2368, 272696336, 68174084, 2368, 272696336, 68174084, 2368, 272696336, 68174084, 2368, 272696336, 68174084, 2368, 272696336, 68174084, 2368, 272696336, 68174084, 2368, 272696336, 68174084, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 2688, 613566756, 1227133513, 10048, 17, 286330880, 10048, 17, 286330880, 10048, 17, 286330880, 10048, 17, 286330880, 10048, 17, 286330880, 10048, 17, 286330880, 10048, 17, 286330880, 10752, 4369, 286326784, 10752, 4369, 286326784, 10752, 4369, 286326784, 10752, 4369, 286326784, 10752, 4369, 286326784, 10752, 4369, 286326784, 10752, 4369, 286326784, 10752, 4369, 286326784, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11328, 286331153, 286331153, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 11648, 2004318071, 2004318071, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0, 12096, 1048575, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466483630442258_815_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466483630442258_815_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8ba04b0d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466483630442258_815_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,453 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((21 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + } + case 1: { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 46))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((132 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 9)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + continue; + } + } + } else { + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 59))) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((278 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 49)) { + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((343 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((354 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter5 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (368 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 12)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (378 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 24)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (388 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 41))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (420 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (434 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (443 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (461 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 23)) { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 46))) { + if ((WaveGetLaneIndex() >= 55)) { + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (487 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (498 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1233 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 1365, 1073741824, 1344, 1365, 1073741824, 1344, 1365, 1073741824, 1344, 1365, 1073741824, 1344, 1365, 1073741824, 1344, 1365, 1073741824, 1344, 1365, 1073741824, 1360, 1365, 1073741824, 1360, 1365, 1073741824, 1360, 1365, 1073741824, 1360, 1365, 1073741824, 1360, 1365, 1073741824, 1360, 1365, 1073741824, 1360, 1365, 1073741824, 1376, 1365, 1073741824, 1376, 1365, 1073741824, 1376, 1365, 1073741824, 1376, 1365, 1073741824, 1376, 1365, 1073741824, 1376, 1365, 1073741824, 1376, 1365, 1073741824, 2304, 0, 1431568384, 2304, 0, 1431568384, 2304, 0, 1431568384, 2304, 0, 1431568384, 2304, 0, 1431568384, 2304, 0, 1431568384, 2304, 0, 1431568384, 2308, 0, 1431568384, 2308, 0, 1431568384, 2308, 0, 1431568384, 2308, 0, 1431568384, 2308, 0, 1431568384, 2308, 0, 1431568384, 2308, 0, 1431568384, 2312, 0, 1431568384, 2312, 0, 1431568384, 2312, 0, 1431568384, 2312, 0, 1431568384, 2312, 0, 1431568384, 2312, 0, 1431568384, 2312, 0, 1431568384, 2320, 0, 1431568384, 2320, 0, 1431568384, 2320, 0, 1431568384, 2320, 0, 1431568384, 2320, 0, 1431568384, 2320, 0, 1431568384, 2320, 0, 1431568384, 2324, 0, 1431568384, 2324, 0, 1431568384, 2324, 0, 1431568384, 2324, 0, 1431568384, 2324, 0, 1431568384, 2324, 0, 1431568384, 2324, 0, 1431568384, 2328, 0, 1431568384, 2328, 0, 1431568384, 2328, 0, 1431568384, 2328, 0, 1431568384, 2328, 0, 1431568384, 2328, 0, 1431568384, 2328, 0, 1431568384, 2336, 0, 1431568384, 2336, 0, 1431568384, 2336, 0, 1431568384, 2336, 0, 1431568384, 2336, 0, 1431568384, 2336, 0, 1431568384, 2336, 0, 1431568384, 2340, 0, 1431568384, 2340, 0, 1431568384, 2340, 0, 1431568384, 2340, 0, 1431568384, 2340, 0, 1431568384, 2340, 0, 1431568384, 2340, 0, 1431568384, 2344, 0, 1431568384, 2344, 0, 1431568384, 2344, 0, 1431568384, 2344, 0, 1431568384, 2344, 0, 1431568384, 2344, 0, 1431568384, 2344, 0, 1431568384, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2752, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2756, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2760, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2768, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2772, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2776, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2784, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2788, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 2792, 0, 1431655765, 3584, 5461, 0, 3584, 5461, 0, 3584, 5461, 0, 3584, 5461, 0, 3584, 5461, 0, 3584, 5461, 0, 3584, 5461, 0, 3600, 5461, 0, 3600, 5461, 0, 3600, 5461, 0, 3600, 5461, 0, 3600, 5461, 0, 3600, 5461, 0, 3600, 5461, 0, 3616, 5461, 0, 3616, 5461, 0, 3616, 5461, 0, 3616, 5461, 0, 3616, 5461, 0, 3616, 5461, 0, 3616, 5461, 0, 9280, 17, 0, 9280, 17, 0, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 9856, 286330897, 286327057, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10176, 2004317815, 2004297591, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 10624, 1048319, 0, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 11200, 1431655509, 1431635285, 14144, 8, 0, 14160, 8, 0, 14176, 8, 0, 14720, 8, 0, 14736, 8, 0, 14752, 8, 0, 19264, 272696336, 68174084, 19264, 272696336, 68174084, 19264, 272696336, 68174084, 19264, 272696336, 68174084, 19264, 272696336, 68174084, 19264, 272696336, 68174084, 19264, 272696336, 68174084, 19264, 272696336, 68174084, 19264, 272696336, 68174084, 19264, 272696336, 68174084, 27776, 0, 16777216, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 29504, 63, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080, 31872, 16383, 4278190080] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466675444954413_817_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466675444954413_817_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bc90b2bd --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466675444954413_817_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((67 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 171 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 2320, 0, 1227132928, 2320, 0, 1227132928, 2320, 0, 1227132928, 2320, 0, 1227132928, 2320, 0, 1227132928, 2320, 0, 1227132928, 2320, 0, 1227132928, 2336, 0, 1227132928, 2336, 0, 1227132928, 2336, 0, 1227132928, 2336, 0, 1227132928, 2336, 0, 1227132928, 2336, 0, 1227132928, 2336, 0, 1227132928, 2352, 0, 1227132928, 2352, 0, 1227132928, 2352, 0, 1227132928, 2352, 0, 1227132928, 2352, 0, 1227132928, 2352, 0, 1227132928, 2352, 0, 1227132928, 4304, 2340, 0, 4304, 2340, 0, 4304, 2340, 0, 4304, 2340, 0, 4320, 2340, 0, 4320, 2340, 0, 4320, 2340, 0, 4320, 2340, 0, 4336, 2340, 0, 4336, 2340, 0, 4336, 2340, 0, 4336, 2340, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466856435971622_819_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466856435971622_819_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..776bd4c4 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466856435971622_819_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,262 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 9)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((counter0 == 1)) { + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (154 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + break; + } + case 2: { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 2)) { + counter5 = (counter5 + 1); + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((222 << 6) | (i4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((229 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 2)) { + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 276 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 960, 2863311530, 2863311530, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5904, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 5920, 1090785345, 272696336, 8020, 64, 0, 8024, 64, 0, 8036, 64, 0, 8040, 64, 0, 9296, 512, 16, 9296, 512, 16, 9312, 512, 16, 9312, 512, 16, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 9856, 272696336, 68174084, 11408, 2, 0, 11424, 2, 0, 14212, 131104, 1, 14212, 131104, 1, 14212, 131104, 1, 14216, 131104, 1, 14216, 131104, 1, 14216, 131104, 1, 14228, 131104, 1, 14228, 131104, 1, 14228, 131104, 1, 14232, 131104, 1, 14232, 131104, 1, 14232, 131104, 1, 14244, 131104, 1, 14244, 131104, 1, 14244, 131104, 1, 14248, 131104, 1, 14248, 131104, 1, 14248, 131104, 1] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756466922543400141_820_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756466922543400141_820_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..84781379 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756466922543400141_820_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,148 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 19)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((58 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((73 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7936, 1431655765, 1431655765, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7680, 174762, 2863267840, 7424, 0, 512] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467031034716315_823_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467031034716315_823_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..704d17f5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467031034716315_823_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,270 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 63))) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 15)) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 37)) { + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + if ((WaveGetLaneIndex() == 25)) { + if ((WaveGetLaneIndex() == 36)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((145 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 2)) { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((163 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((174 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((204 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 117 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 2944, 102, 0, 2944, 102, 0, 2944, 102, 0, 2944, 102, 0, 2960, 102, 0, 2960, 102, 0, 2960, 102, 0, 2960, 102, 0, 2976, 102, 0, 2976, 102, 0, 2976, 102, 0, 2976, 102, 0, 3840, 102, 0, 3840, 102, 0, 3840, 102, 0, 3840, 102, 0, 6480, 0, 524288, 6496, 0, 524288, 7360, 0, 2290089984, 7360, 0, 2290089984, 7360, 0, 2290089984, 11152, 33554432, 0, 11156, 33554432, 0, 11168, 33554432, 0, 11172, 33554432, 0, 11184, 33554432, 0, 11188, 33554432, 0, 11728, 33554432, 0, 11732, 33554432, 0, 11744, 33554432, 0, 11748, 33554432, 0, 11760, 33554432, 0, 11764, 33554432, 0, 13888, 85, 0, 13888, 85, 0, 13888, 85, 0, 13888, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467075690423620_825_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467075690423620_825_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..caa5682f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467075690423620_825_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 61)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 3))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 23)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 150 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3840, 0, 3758096384, 3840, 0, 3758096384, 3840, 0, 3758096384, 3456, 2056, 2097184, 3456, 2056, 2097184, 3456, 2056, 2097184, 3456, 2056, 2097184, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 3200, 2039, 528482304, 2816, 8384512, 0, 2816, 8384512, 0, 2816, 8384512, 0, 2816, 8384512, 0, 2816, 8384512, 0, 2816, 8384512, 0, 2816, 8384512, 0, 2816, 8384512, 0, 2816, 8384512, 0, 2816, 8384512, 0, 2816, 8384512, 0, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405, 2432, 1426063360, 5592405] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467302806081350_829_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467302806081350_829_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b553cf2f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467302806081350_829_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,200 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 54 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 6592, 4195328, 67125252, 6592, 4195328, 67125252, 6592, 4195328, 67125252, 6592, 4195328, 67125252, 6592, 4195328, 67125252, 6912, 67125252, 1074004032, 6912, 67125252, 1074004032, 6912, 67125252, 1074004032, 6912, 67125252, 1074004032, 6912, 67125252, 1074004032, 6912, 67125252, 1074004032, 7360, 559240, 0, 7360, 559240, 0, 7360, 559240, 0, 7360, 559240, 0, 7360, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467323613344752_831_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467323613344752_831_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d40eb5d5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467323613344752_831_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,221 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 33)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 18)) { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 34)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 29)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((148 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 43))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((212 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter3 == 2)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 771 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1856, 0, 33587200, 1856, 0, 33587200, 1872, 0, 33587200, 1872, 0, 33587200, 1888, 0, 33587200, 1888, 0, 33587200, 4416, 0, 4278190080, 4416, 0, 4278190080, 4416, 0, 4278190080, 4416, 0, 4278190080, 4416, 0, 4278190080, 4416, 0, 4278190080, 4416, 0, 4278190080, 4416, 0, 4278190080, 4432, 0, 4278190080, 4432, 0, 4278190080, 4432, 0, 4278190080, 4432, 0, 4278190080, 4432, 0, 4278190080, 4432, 0, 4278190080, 4432, 0, 4278190080, 4432, 0, 4278190080, 4448, 0, 4278190080, 4448, 0, 4278190080, 4448, 0, 4278190080, 4448, 0, 4278190080, 4448, 0, 4278190080, 4448, 0, 4278190080, 4448, 0, 4278190080, 4448, 0, 4278190080, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5632, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5636, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5640, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5648, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5652, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5656, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5664, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5668, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 5672, 0, 4294705152, 6528, 0, 4227858432, 6528, 0, 4227858432, 6528, 0, 4227858432, 6528, 0, 4227858432, 6528, 0, 4227858432, 6528, 0, 4227858432, 6544, 0, 4227858432, 6544, 0, 4227858432, 6544, 0, 4227858432, 6544, 0, 4227858432, 6544, 0, 4227858432, 6544, 0, 4227858432, 6560, 0, 4227858432, 6560, 0, 4227858432, 6560, 0, 4227858432, 6560, 0, 4227858432, 6560, 0, 4227858432, 6560, 0, 4227858432, 7744, 1073807360, 0, 7744, 1073807360, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11840, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11856, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 11872, 33554431, 0, 12544, 1073741824, 1, 12544, 1073741824, 1, 12560, 1073741824, 1, 12560, 1073741824, 1, 12576, 1073741824, 1, 12576, 1073741824, 1] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467370142930839_832_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467370142930839_832_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..53c93eda --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467370142930839_832_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513, 1472, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467393098145455_834_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467393098145455_834_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d5217b0f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467393098145455_834_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,417 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 2)) { + break; + } + } + break; + } + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 28)) { + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((113 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 14)) { + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (198 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (207 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 50)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 43))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (248 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (252 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (294 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (307 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 351 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1792, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1808, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 1824, 43690, 2863136768, 2944, 2, 2147483648, 2944, 2, 2147483648, 2960, 2, 2147483648, 2960, 2, 2147483648, 2976, 2, 2147483648, 2976, 2, 2147483648, 3648, 2730, 2147483648, 3648, 2730, 2147483648, 3648, 2730, 2147483648, 3648, 2730, 2147483648, 3648, 2730, 2147483648, 3648, 2730, 2147483648, 3648, 2730, 2147483648, 3664, 2730, 2147483648, 3664, 2730, 2147483648, 3664, 2730, 2147483648, 3664, 2730, 2147483648, 3664, 2730, 2147483648, 3664, 2730, 2147483648, 3664, 2730, 2147483648, 3680, 2730, 2147483648, 3680, 2730, 2147483648, 3680, 2730, 2147483648, 3680, 2730, 2147483648, 3680, 2730, 2147483648, 3680, 2730, 2147483648, 3680, 2730, 2147483648, 4928, 2730, 2147483648, 4928, 2730, 2147483648, 4928, 2730, 2147483648, 4928, 2730, 2147483648, 4928, 2730, 2147483648, 4928, 2730, 2147483648, 4928, 2730, 2147483648, 4944, 2730, 2147483648, 4944, 2730, 2147483648, 4944, 2730, 2147483648, 4944, 2730, 2147483648, 4944, 2730, 2147483648, 4944, 2730, 2147483648, 4944, 2730, 2147483648, 4960, 2730, 2147483648, 4960, 2730, 2147483648, 4960, 2730, 2147483648, 4960, 2730, 2147483648, 4960, 2730, 2147483648, 4960, 2730, 2147483648, 4960, 2730, 2147483648, 6592, 524288, 16777344, 6592, 524288, 16777344, 6592, 524288, 16777344, 6608, 524288, 16777344, 6608, 524288, 16777344, 6608, 524288, 16777344, 6624, 524288, 16777344, 6624, 524288, 16777344, 6624, 524288, 16777344, 17408, 4, 0, 17856, 0, 1145323520, 17856, 0, 1145323520, 17856, 0, 1145323520, 17856, 0, 1145323520, 17856, 0, 1145323520, 20096, 559240, 0, 20096, 559240, 0, 20096, 559240, 0, 20096, 559240, 0, 20096, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467433995951425_835_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467433995951425_835_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..35613ae2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467433995951425_835_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,147 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 16)) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 28)) { + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 11))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 81 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1232, 65536, 0, 1248, 65536, 0, 5696, 1048832, 0, 5696, 1048832, 0, 5712, 1048832, 0, 5712, 1048832, 0, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 6592, 1145324612, 1145324612, 7040, 559240, 0, 7040, 559240, 0, 7040, 559240, 0, 7040, 559240, 0, 7040, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467470091933982_837_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467470091933982_837_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ca1e27fd --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467470091933982_837_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,458 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 31)) { + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 3))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 7)) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((149 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (180 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((213 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((222 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((249 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((279 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((298 << 6) | (i3 << 4)) | (counter4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 1)) { + break; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (305 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (344 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() < 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((359 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 63))) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (396 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (405 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (416 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (429 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 59)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (439 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (446 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (457 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (461 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 819 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 576, 33554431, 0, 1216, 17, 0, 1216, 17, 0, 7040, 68, 0, 7040, 68, 0, 10816, 559240, 0, 10816, 559240, 0, 10816, 559240, 0, 10816, 559240, 0, 10816, 559240, 0, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12752, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 12768, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13648, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 13664, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14224, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 14240, 1431655765, 1431655765, 15936, 139264, 0, 15936, 139264, 0, 15952, 139264, 0, 15952, 139264, 0, 17860, 131072, 524288, 17860, 131072, 524288, 17876, 131072, 524288, 17876, 131072, 524288, 19076, 2, 134218240, 19076, 2, 134218240, 19076, 2, 134218240, 19092, 2, 134218240, 19092, 2, 134218240, 19092, 2, 134218240, 20160, 73, 0, 20160, 73, 0, 20160, 73, 0, 21440, 2, 603979776, 21440, 2, 603979776, 21440, 2, 603979776, 22016, 306783376, 0, 22016, 306783376, 0, 22016, 306783376, 0, 22016, 306783376, 0, 22016, 306783376, 0, 22016, 306783376, 0, 22016, 306783376, 0, 22016, 306783376, 0, 22016, 306783376, 0, 22976, 16, 0, 22992, 16, 0, 24128, 36, 0, 24128, 36, 0, 25664, 292, 0, 25664, 292, 0, 25664, 292, 0, 26624, 292, 0, 26624, 292, 0, 26624, 292, 0, 27456, 149504, 1224736768, 27456, 149504, 1224736768, 27456, 149504, 1224736768, 27456, 149504, 1224736768, 27456, 149504, 1224736768, 27456, 149504, 1224736768, 29248, 2048, 1073741824, 29248, 2048, 1073741824] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467584664344806_838_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467584664344806_838_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1c999095 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467584664344806_838_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,250 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((35 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 25))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } else { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((124 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 14))) { + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 34)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 23)) { + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 64, 0, 13824, 2, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467585274689782_839_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467585274689782_839_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8216cc50 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467585274689782_839_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,152 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 15)) { + if ((WaveGetLaneIndex() == 33)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 6 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 32, 0, 864, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467589151278048_842_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467589151278048_842_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..74b34062 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467589151278048_842_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,231 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 47)) { + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (147 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((185 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((200 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 420 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1744, 9, 2453667840, 1744, 9, 2453667840, 1744, 9, 2453667840, 1744, 9, 2453667840, 1744, 9, 2453667840, 1744, 9, 2453667840, 1760, 9, 2453667840, 1760, 9, 2453667840, 1760, 9, 2453667840, 1760, 9, 2453667840, 1760, 9, 2453667840, 1760, 9, 2453667840, 2576, 9, 2449473536, 2576, 9, 2449473536, 2576, 9, 2449473536, 2576, 9, 2449473536, 2576, 9, 2449473536, 2592, 9, 2449473536, 2592, 9, 2449473536, 2592, 9, 2449473536, 2592, 9, 2449473536, 2592, 9, 2449473536, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3728, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 3744, 1090785345, 272696336, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4288, 1363481681, 340870420, 4928, 73, 0, 4928, 73, 0, 4928, 73, 0, 6848, 2, 613416960, 6848, 2, 613416960, 6848, 2, 613416960, 6848, 2, 613416960, 6848, 2, 613416960, 7552, 2, 613564416, 7552, 2, 613564416, 7552, 2, 613564416, 7552, 2, 613564416, 7552, 2, 613564416, 7552, 2, 613564416, 7552, 2, 613564416, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 7872, 613566756, 1227133513, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 9408, 31, 4294901760, 11844, 1024, 2097152, 11844, 1024, 2097152, 11848, 1024, 2097152, 11848, 1024, 2097152, 11852, 1024, 2097152, 11852, 1024, 2097152, 11860, 1024, 2097152, 11860, 1024, 2097152, 11864, 1024, 2097152, 11864, 1024, 2097152, 11868, 1024, 2097152, 11868, 1024, 2097152, 12804, 32, 0, 12808, 32, 0, 12812, 32, 0, 12820, 32, 0, 12824, 32, 0, 12828, 32, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467630067052412_843_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467630067052412_843_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467630067052412_843_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467630277290416_844_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467630277290416_844_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1469e0ef --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467630277290416_844_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 59))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((65 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((81 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((90 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3280, 2147483648, 134217728, 3280, 2147483648, 134217728, 3296, 2147483648, 134217728, 3296, 2147483648, 134217728, 3312, 2147483648, 134217728, 3312, 2147483648, 134217728, 4176, 4, 65536, 4176, 4, 65536, 4192, 4, 65536, 4192, 4, 65536, 4208, 4, 65536, 4208, 4, 65536, 5776, 2147483648, 134217728, 5776, 2147483648, 134217728, 5792, 2147483648, 134217728, 5792, 2147483648, 134217728, 5808, 2147483648, 134217728, 5808, 2147483648, 134217728, 7168, 0, 4152360960, 7168, 0, 4152360960, 7168, 0, 4152360960, 7168, 0, 4152360960, 7168, 0, 4152360960, 7168, 0, 4152360960, 7168, 0, 4152360960, 7168, 0, 4152360960] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467630737574931_845_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467630737574931_845_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b564a060 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467630737574931_845_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,289 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((33 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 39)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((76 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 51)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (160 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (170 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (184 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 378 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 65, 0, 768, 65, 0, 2128, 1024, 0, 2144, 1024, 0, 2160, 1024, 0, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 2432, 68174084, 1090785345, 3344, 699050, 0, 3344, 699050, 0, 3344, 699050, 0, 3344, 699050, 0, 3344, 699050, 0, 3344, 699050, 0, 3344, 699050, 0, 3344, 699050, 0, 3344, 699050, 0, 3344, 699050, 0, 3360, 699050, 0, 3360, 699050, 0, 3360, 699050, 0, 3360, 699050, 0, 3360, 699050, 0, 3360, 699050, 0, 3360, 699050, 0, 3360, 699050, 0, 3360, 699050, 0, 3360, 699050, 0, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4304, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4308, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4320, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4324, 0, 2863311488, 4880, 2730, 0, 4880, 2730, 0, 4880, 2730, 0, 4880, 2730, 0, 4880, 2730, 0, 4880, 2730, 0, 4896, 2730, 0, 4896, 2730, 0, 4896, 2730, 0, 4896, 2730, 0, 4896, 2730, 0, 4896, 2730, 0, 5504, 73, 0, 5504, 73, 0, 5504, 73, 0, 6144, 2, 0, 6784, 16, 0, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513, 11776, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467682991683943_848_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467682991683943_848_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0df7d80a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467682991683943_848_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,225 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((89 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (153 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (163 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 324 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1552, 262144, 2, 1552, 262144, 2, 1568, 262144, 2, 1568, 262144, 2, 1584, 262144, 2, 1584, 262144, 2, 3280, 16777224, 0, 3280, 16777224, 0, 3284, 16777224, 0, 3284, 16777224, 0, 3288, 16777224, 0, 3288, 16777224, 0, 3296, 16777224, 0, 3296, 16777224, 0, 3300, 16777224, 0, 3300, 16777224, 0, 3304, 16777224, 0, 3304, 16777224, 0, 3312, 16777224, 0, 3312, 16777224, 0, 3316, 16777224, 0, 3316, 16777224, 0, 3320, 16777224, 0, 3320, 16777224, 0, 4496, 0, 66560, 4496, 0, 66560, 4500, 0, 66560, 4500, 0, 66560, 4504, 0, 66560, 4504, 0, 66560, 4512, 0, 66560, 4512, 0, 66560, 4516, 0, 66560, 4516, 0, 66560, 4520, 0, 66560, 4520, 0, 66560, 4528, 0, 66560, 4528, 0, 66560, 4532, 0, 66560, 4532, 0, 66560, 4536, 0, 66560, 4536, 0, 66560, 5712, 0, 8192, 5728, 0, 8192, 5744, 0, 8192, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6272, 272696336, 68174084, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 6592, 613566756, 1227133513, 8384, 85, 0, 8384, 85, 0, 8384, 85, 0, 8384, 85, 0, 10432, 85, 0, 10432, 85, 0, 10432, 85, 0, 10432, 85, 0, 11392, 0, 2863267840, 11392, 0, 2863267840, 11392, 0, 2863267840, 11392, 0, 2863267840, 11392, 0, 2863267840, 11392, 0, 2863267840, 11392, 0, 2863267840, 11392, 0, 2863267840, 11408, 0, 2863267840, 11408, 0, 2863267840, 11408, 0, 2863267840, 11408, 0, 2863267840, 11408, 0, 2863267840, 11408, 0, 2863267840, 11408, 0, 2863267840, 11408, 0, 2863267840, 11424, 0, 2863267840, 11424, 0, 2863267840, 11424, 0, 2863267840, 11424, 0, 2863267840, 11424, 0, 2863267840, 11424, 0, 2863267840, 11424, 0, 2863267840, 11424, 0, 2863267840] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467729849542521_850_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467729849542521_850_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..84be44c9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467729849542521_850_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,215 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 24)) { + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 7)) { + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (44 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 26)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 54))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((125 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 2396745, 0, 1152, 2396745, 0, 1152, 2396745, 0, 1152, 2396745, 0, 1152, 2396745, 0, 1152, 2396745, 0, 1152, 2396745, 0, 1152, 2396745, 0, 1600, 299593, 0, 1600, 299593, 0, 1600, 299593, 0, 1600, 299593, 0, 1600, 299593, 0, 1600, 299593, 0, 1600, 299593, 0, 6864, 2, 0, 6868, 2, 0, 6880, 2, 0, 6884, 2, 0, 6896, 2, 0, 6900, 2, 0, 8016, 2, 612368384, 8016, 2, 612368384, 8016, 2, 612368384, 8016, 2, 612368384, 8020, 2, 612368384, 8020, 2, 612368384, 8020, 2, 612368384, 8020, 2, 612368384, 8032, 2, 612368384, 8032, 2, 612368384, 8032, 2, 612368384, 8032, 2, 612368384, 8036, 2, 612368384, 8036, 2, 612368384, 8036, 2, 612368384, 8036, 2, 612368384, 8048, 2, 612368384, 8048, 2, 612368384, 8048, 2, 612368384, 8048, 2, 612368384, 8052, 2, 612368384, 8052, 2, 612368384, 8052, 2, 612368384, 8052, 2, 612368384, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513, 8320, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467735407845398_851_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467735407845398_851_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0e09d3b7 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467735407845398_851_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,178 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((27 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 48))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((45 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((64 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((76 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((((101 << 6) | (counter0 << 4)) | (counter1 << 2)) | counter2); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((108 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 414 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 0, 262144, 864, 0, 262144, 1748, 0, 2097152, 1764, 0, 2097152, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2900, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 2916, 2047, 4294901760, 3540, 17, 0, 3540, 17, 0, 3556, 17, 0, 3556, 17, 0, 4116, 4369, 286326784, 4116, 4369, 286326784, 4116, 4369, 286326784, 4116, 4369, 286326784, 4116, 4369, 286326784, 4116, 4369, 286326784, 4116, 4369, 286326784, 4116, 4369, 286326784, 4132, 4369, 286326784, 4132, 4369, 286326784, 4132, 4369, 286326784, 4132, 4369, 286326784, 4132, 4369, 286326784, 4132, 4369, 286326784, 4132, 4369, 286326784, 4132, 4369, 286326784, 4436, 1092, 1145307136, 4436, 1092, 1145307136, 4436, 1092, 1145307136, 4436, 1092, 1145307136, 4436, 1092, 1145307136, 4436, 1092, 1145307136, 4436, 1092, 1145307136, 4452, 1092, 1145307136, 4452, 1092, 1145307136, 4452, 1092, 1145307136, 4452, 1092, 1145307136, 4452, 1092, 1145307136, 4452, 1092, 1145307136, 4452, 1092, 1145307136, 4884, 2184, 0, 4884, 2184, 0, 4884, 2184, 0, 4900, 2184, 0, 4900, 2184, 0, 4900, 2184, 0, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5588, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 5604, 2863300608, 43690, 6485, 0, 256, 6486, 0, 256, 6501, 0, 256, 6502, 0, 256, 6932, 0, 2097152, 6948, 0, 2097152] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467781233263712_852_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467781233263712_852_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c342eb88 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467781233263712_852_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,291 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 50))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 59))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 8))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 47))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((207 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((221 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((236 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((243 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 183 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2048, 0, 268435456, 4992, 0, 268435456, 6336, 1, 0, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 7232, 1145324612, 1145324612, 13248, 8, 0, 13264, 8, 0, 13280, 8, 0, 14144, 8390656, 134250504, 14144, 8390656, 134250504, 14144, 8390656, 134250504, 14144, 8390656, 134250504, 14144, 8390656, 134250504, 14160, 8390656, 134250504, 14160, 8390656, 134250504, 14160, 8390656, 134250504, 14160, 8390656, 134250504, 14160, 8390656, 134250504, 14176, 8390656, 134250504, 14176, 8390656, 134250504, 14176, 8390656, 134250504, 14176, 8390656, 134250504, 14176, 8390656, 134250504, 15552, 2290649224, 0, 15552, 2290649224, 0, 15552, 2290649224, 0, 15552, 2290649224, 0, 15552, 2290649224, 0, 15552, 2290649224, 0, 15552, 2290649224, 0, 15552, 2290649224, 0, 15568, 2290649224, 0, 15568, 2290649224, 0, 15568, 2290649224, 0, 15568, 2290649224, 0, 15568, 2290649224, 0, 15568, 2290649224, 0, 15568, 2290649224, 0, 15568, 2290649224, 0, 15584, 2290649224, 0, 15584, 2290649224, 0, 15584, 2290649224, 0, 15584, 2290649224, 0, 15584, 2290649224, 0, 15584, 2290649224, 0, 15584, 2290649224, 0, 15584, 2290649224, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467785712982990_853_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467785712982990_853_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cd8a784d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467785712982990_853_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,319 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((29 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 19)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (95 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((172 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((183 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (197 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 336 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 1, 0, 3536, 65536, 0, 3552, 65536, 0, 3568, 65536, 0, 5120, 0, 16777216, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 6976, 1145324612, 1145324612, 7424, 559240, 0, 7424, 559240, 0, 7424, 559240, 0, 7424, 559240, 0, 7424, 559240, 0, 8320, 73, 0, 8320, 73, 0, 8320, 73, 0, 9872, 2, 0, 9888, 2, 0, 11028, 9362, 603979776, 11028, 9362, 603979776, 11028, 9362, 603979776, 11028, 9362, 603979776, 11028, 9362, 603979776, 11028, 9362, 603979776, 11028, 9362, 603979776, 11032, 9362, 603979776, 11032, 9362, 603979776, 11032, 9362, 603979776, 11032, 9362, 603979776, 11032, 9362, 603979776, 11032, 9362, 603979776, 11032, 9362, 603979776, 11044, 9362, 603979776, 11044, 9362, 603979776, 11044, 9362, 603979776, 11044, 9362, 603979776, 11044, 9362, 603979776, 11044, 9362, 603979776, 11044, 9362, 603979776, 11048, 9362, 603979776, 11048, 9362, 603979776, 11048, 9362, 603979776, 11048, 9362, 603979776, 11048, 9362, 603979776, 11048, 9362, 603979776, 11048, 9362, 603979776, 11732, 146, 613548032, 11732, 146, 613548032, 11732, 146, 613548032, 11732, 146, 613548032, 11732, 146, 613548032, 11732, 146, 613548032, 11732, 146, 613548032, 11732, 146, 613548032, 11736, 146, 613548032, 11736, 146, 613548032, 11736, 146, 613548032, 11736, 146, 613548032, 11736, 146, 613548032, 11736, 146, 613548032, 11736, 146, 613548032, 11736, 146, 613548032, 11748, 146, 613548032, 11748, 146, 613548032, 11748, 146, 613548032, 11748, 146, 613548032, 11748, 146, 613548032, 11748, 146, 613548032, 11748, 146, 613548032, 11748, 146, 613548032, 11752, 146, 613548032, 11752, 146, 613548032, 11752, 146, 613548032, 11752, 146, 613548032, 11752, 146, 613548032, 11752, 146, 613548032, 11752, 146, 613548032, 11752, 146, 613548032, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513, 12352, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467854847522914_857_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467854847522914_857_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e20e4f8f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467854847522914_857_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 51)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((32 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((39 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((59 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((77 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((86 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() == 32)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((118 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((164 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (171 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 52)) { + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((216 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter7 == 1)) { + break; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 13))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (249 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 504 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1040, 0, 286261248, 1040, 0, 286261248, 1040, 0, 286261248, 1056, 0, 286261248, 1056, 0, 286261248, 1056, 0, 286261248, 1072, 0, 286261248, 1072, 0, 286261248, 1072, 0, 286261248, 2068, 65536, 0, 2084, 65536, 0, 2100, 65536, 0, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4932, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4936, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4948, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 4952, 572662306, 572662306, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6544, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6560, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 6576, 1145324612, 1145324612, 9792, 2184, 2290647040, 9792, 2184, 2290647040, 9792, 2184, 2290647040, 9792, 2184, 2290647040, 9792, 2184, 2290647040, 9792, 2184, 2290647040, 9792, 2184, 2290647040, 9792, 2184, 2290647040, 9808, 2184, 2290647040, 9808, 2184, 2290647040, 9808, 2184, 2290647040, 9808, 2184, 2290647040, 9808, 2184, 2290647040, 9808, 2184, 2290647040, 9808, 2184, 2290647040, 9808, 2184, 2290647040, 10496, 2184, 2290647040, 10496, 2184, 2290647040, 10496, 2184, 2290647040, 10496, 2184, 2290647040, 10496, 2184, 2290647040, 10496, 2184, 2290647040, 10496, 2184, 2290647040, 10496, 2184, 2290647040, 10512, 2184, 2290647040, 10512, 2184, 2290647040, 10512, 2184, 2290647040, 10512, 2184, 2290647040, 10512, 2184, 2290647040, 10512, 2184, 2290647040, 10512, 2184, 2290647040, 10512, 2184, 2290647040, 13840, 1, 0, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345, 17728, 68174084, 1090785345] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467920749965663_858_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467920749965663_858_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467920749965663_858_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467970405585495_860_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467970405585495_860_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d98ed631 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467970405585495_860_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,71 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 26)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((14 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 896, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 912, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0, 928, 67108863, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756467971486006623_861_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756467971486006623_861_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9e253b31 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756467971486006623_861_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,173 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((15 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 41))) { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((53 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 13)) { + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 5))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((94 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((113 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((137 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 231 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 976, 1431655765, 1431655765, 7252, 0, 524288, 7256, 0, 524288, 7260, 0, 524288, 8784, 0, 524288, 10256, 1049216, 16777472, 10256, 1049216, 16777472, 10256, 1049216, 16777472, 10256, 1049216, 16777472, 10256, 1049216, 16777472, 11728, 134217728, 2147484673, 11728, 134217728, 2147484673, 11728, 134217728, 2147484673, 11728, 134217728, 2147484673, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765, 12304, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468184623788143_866_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468184623788143_866_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c84406a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468184623788143_866_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,179 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (15 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (25 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((48 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((79 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((88 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((92 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((99 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 147 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5060, 65, 0, 5060, 65, 0, 5064, 65, 0, 5064, 65, 0, 5068, 65, 0, 5068, 65, 0, 5076, 65, 0, 5076, 65, 0, 5080, 65, 0, 5080, 65, 0, 5084, 65, 0, 5084, 65, 0, 5092, 65, 0, 5092, 65, 0, 5096, 65, 0, 5096, 65, 0, 5100, 65, 0, 5100, 65, 0, 6912, 272696336, 68174084, 6912, 272696336, 68174084, 6912, 272696336, 68174084, 6912, 272696336, 68174084, 6912, 272696336, 68174084, 6912, 272696336, 68174084, 6912, 272696336, 68174084, 6912, 272696336, 68174084, 6912, 272696336, 68174084, 6912, 272696336, 68174084, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513, 7232, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468206959484881_867_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468206959484881_867_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9f0d8758 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468206959484881_867_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,476 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 49))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 37)) { + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((151 << 6) | (i1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((177 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((201 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((206 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((210 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((counter3 == 1)) { + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 58)) { + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((285 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((WaveGetLaneIndex() == 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((317 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + if ((WaveGetLaneIndex() == 16)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((327 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((338 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (i5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 22))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (379 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (386 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter6 = 0; + while ((counter6 < 3)) { + counter6 = (counter6 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((402 << 6) | (counter6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (409 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (416 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 222 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [11344, 17, 0, 11344, 17, 0, 12304, 1, 0, 12880, 268501008, 1048832, 12880, 268501008, 1048832, 12880, 268501008, 1048832, 12880, 268501008, 1048832, 12880, 268501008, 1048832, 13200, 1048832, 16781313, 13200, 1048832, 16781313, 13200, 1048832, 16781313, 13200, 1048832, 16781313, 13200, 1048832, 16781313, 6272, 8738, 572653568, 6272, 8738, 572653568, 6272, 8738, 572653568, 6272, 8738, 572653568, 6272, 8738, 572653568, 6272, 8738, 572653568, 6272, 8738, 572653568, 6272, 8738, 572653568, 7440, 34, 570425344, 7440, 34, 570425344, 7440, 34, 570425344, 7440, 34, 570425344, 7456, 34, 570425344, 7456, 34, 570425344, 7456, 34, 570425344, 7456, 34, 570425344, 7472, 34, 570425344, 7472, 34, 570425344, 7472, 34, 570425344, 7472, 34, 570425344, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 7744, 1145324612, 1145324612, 768, 1, 0, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 1344, 268501008, 1048832, 21632, 1092, 1145307136, 21632, 1092, 1145307136, 21632, 1092, 1145307136, 21632, 1092, 1145307136, 21632, 1092, 1145307136, 21632, 1092, 1145307136, 21632, 1092, 1145307136, 21648, 1092, 1145307136, 21648, 1092, 1145307136, 21648, 1092, 1145307136, 21648, 1092, 1145307136, 21648, 1092, 1145307136, 21648, 1092, 1145307136, 21648, 1092, 1145307136, 26624, 559240, 0, 26624, 559240, 0, 26624, 559240, 0, 26624, 559240, 0, 26624, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468363430657902_874_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468363430657902_874_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..503204fc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468363430657902_874_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,255 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 51))) { + if (((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (97 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() < 25)) { + if ((WaveGetLaneIndex() < 23)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 28))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (186 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (191 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (200 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 201 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1792, 1, 0, 4164, 1, 0, 4168, 1, 0, 4172, 1, 0, 4180, 1, 0, 4184, 1, 0, 4188, 1, 0, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 8000, 272696336, 68174084, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 12800, 3067833782, 1840700269, 14608, 536903680, 134217744, 14608, 536903680, 134217744, 14608, 536903680, 134217744, 14608, 536903680, 134217744, 14624, 536903680, 134217744, 14624, 536903680, 134217744, 14624, 536903680, 134217744, 14624, 536903680, 134217744] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468367804927914_875_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468367804927914_875_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..957f9c9a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468367804927914_875_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,237 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((30 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if ((WaveGetLaneIndex() == 0)) { + if ((WaveGetLaneIndex() == 38)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 55))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (107 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 33))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (132 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((165 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((174 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 3776, 1, 0, 4992, 73, 0, 4992, 73, 0, 4992, 73, 0, 8128, 272696336, 68174084, 8128, 272696336, 68174084, 8128, 272696336, 68174084, 8128, 272696336, 68174084, 8128, 272696336, 68174084, 8128, 272696336, 68174084, 8128, 272696336, 68174084, 8128, 272696336, 68174084, 8128, 272696336, 68174084, 8128, 272696336, 68174084, 9664, 545392672, 136348168, 9664, 545392672, 136348168, 9664, 545392672, 136348168, 9664, 545392672, 136348168, 9664, 545392672, 136348168, 9664, 545392672, 136348168, 9664, 545392672, 136348168, 9664, 545392672, 136348168, 9664, 545392672, 136348168, 9664, 545392672, 136348168, 9680, 545392672, 136348168, 9680, 545392672, 136348168, 9680, 545392672, 136348168, 9680, 545392672, 136348168, 9680, 545392672, 136348168, 9680, 545392672, 136348168, 9680, 545392672, 136348168, 9680, 545392672, 136348168, 9680, 545392672, 136348168, 9680, 545392672, 136348168, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10560, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 10576, 68174084, 1090785345, 11136, 545392672, 136348168, 11136, 545392672, 136348168, 11136, 545392672, 136348168, 11136, 545392672, 136348168, 11136, 545392672, 136348168, 11136, 545392672, 136348168, 11136, 545392672, 136348168, 11136, 545392672, 136348168, 11136, 545392672, 136348168, 11136, 545392672, 136348168, 11152, 545392672, 136348168, 11152, 545392672, 136348168, 11152, 545392672, 136348168, 11152, 545392672, 136348168, 11152, 545392672, 136348168, 11152, 545392672, 136348168, 11152, 545392672, 136348168, 11152, 545392672, 136348168, 11152, 545392672, 136348168, 11152, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468433718841065_878_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468433718841065_878_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ab2606d2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468433718841065_878_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 55)) { + if ((WaveGetLaneIndex() == 56)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 36))) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1024 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 3 + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468433861728392_879_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468433861728392_879_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9f14b1eb --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468433861728392_879_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 31)) { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 21 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 2147483648, 0, 1232, 2147483648, 0, 1248, 2147483648, 0, 1856, 85, 0, 1856, 85, 0, 1856, 85, 0, 1856, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468434181845421_880_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468434181845421_880_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9e351c9d --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468434181845421_880_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,138 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (20 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1280, 273, 268435456, 1280, 273, 268435456, 1280, 273, 268435456, 1280, 273, 268435456, 2176, 0, 1048576, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3072, 1145324612, 1145324612, 3520, 559240, 0, 3520, 559240, 0, 3520, 559240, 0, 3520, 559240, 0, 3520, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468434668989424_881_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468434668989424_881_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..f990c83f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468434668989424_881_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,112 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 45 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1216, 8, 0, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168, 2112, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468435058337787_882_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468435058337787_882_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..48ac3318 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468435058337787_882_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,90 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((17 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((47 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 981 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1104, 63, 4160749568, 1104, 63, 4160749568, 1104, 63, 4160749568, 1104, 63, 4160749568, 1104, 63, 4160749568, 1104, 63, 4160749568, 1104, 63, 4160749568, 1104, 63, 4160749568, 1104, 63, 4160749568, 1104, 63, 4160749568, 1104, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1120, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 1136, 63, 4160749568, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2320, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2324, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2336, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2340, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2352, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 2356, 2047, 4292870144, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3024, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3028, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3040, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3044, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3056, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720, 3060, 32767, 4293918720] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468446539106998_883_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468446539106998_883_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e0548be1 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468446539106998_883_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 22))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 6))) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((130 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 49))) { + if (((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((182 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (210 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 16)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((231 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (259 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (264 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (283 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() == 52)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((297 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 43)) { + if ((WaveGetLaneIndex() < 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((309 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter2 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (316 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 360 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4736, 2863311530, 2863311530, 4480, 0, 1431654400, 4480, 0, 1431654400, 4480, 0, 1431654400, 4480, 0, 1431654400, 4480, 0, 1431654400, 4480, 0, 1431654400, 4480, 0, 1431654400, 4480, 0, 1431654400, 4480, 0, 1431654400, 4480, 0, 1431654400, 4224, 4194304, 0, 3840, 4, 0, 8320, 64, 65536, 8320, 64, 65536, 8336, 64, 65536, 8336, 64, 65536, 13440, 2097152, 0, 14352, 521, 0, 14352, 521, 0, 14352, 521, 0, 14368, 521, 0, 14368, 521, 0, 14368, 521, 0, 14384, 521, 0, 14384, 521, 0, 14384, 521, 0, 14800, 37385, 0, 14800, 37385, 0, 14800, 37385, 0, 14800, 37385, 0, 14800, 37385, 0, 14816, 37385, 0, 14816, 37385, 0, 14816, 37385, 0, 14816, 37385, 0, 14816, 37385, 0, 14832, 37385, 0, 14832, 37385, 0, 14832, 37385, 0, 14832, 37385, 0, 14832, 37385, 0, 16000, 0, 2147483648, 16576, 272696336, 68174084, 16576, 272696336, 68174084, 16576, 272696336, 68174084, 16576, 272696336, 68174084, 16576, 272696336, 68174084, 16576, 272696336, 68174084, 16576, 272696336, 68174084, 16576, 272696336, 68174084, 16576, 272696336, 68174084, 16576, 272696336, 68174084, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 16896, 613566756, 1227133513, 17536, 73, 0, 17536, 73, 0, 17536, 73, 0, 18112, 272696336, 68174084, 18112, 272696336, 68174084, 18112, 272696336, 68174084, 18112, 272696336, 68174084, 18112, 272696336, 68174084, 18112, 272696336, 68174084, 18112, 272696336, 68174084, 18112, 272696336, 68174084, 18112, 272696336, 68174084, 18112, 272696336, 68174084, 19024, 0, 1048576, 19040, 0, 1048576] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468472756596749_884_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468472756596749_884_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a4642802 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468472756596749_884_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,147 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 41)) { + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (24 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 62)) { + if ((WaveGetLaneIndex() == 33)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 3776, 272696336, 68174084, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513, 4096, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756468595964106564_888_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756468595964106564_888_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..33974a19 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756468595964106564_888_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,242 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 54))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (120 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 25)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((169 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((178 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 3536, 2147483648, 0, 3552, 2147483648, 0, 3568, 2147483648, 0, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 6272, 2454267026, 613566756, 7680, 4, 1227096064, 7680, 4, 1227096064, 7680, 4, 1227096064, 7680, 4, 1227096064, 7680, 4, 1227096064, 7680, 4, 1227096064, 8384, 292, 1227096064, 8384, 292, 1227096064, 8384, 292, 1227096064, 8384, 292, 1227096064, 8384, 292, 1227096064, 8384, 292, 1227096064, 8384, 292, 1227096064, 8384, 292, 1227096064, 8832, 9568256, 0, 8832, 9568256, 0, 8832, 9568256, 0, 9872, 545390592, 33288, 9872, 545390592, 33288, 9872, 545390592, 33288, 9872, 545390592, 33288, 9872, 545390592, 33288, 9872, 545390592, 33288, 9888, 545390592, 33288, 9888, 545390592, 33288, 9888, 545390592, 33288, 9888, 545390592, 33288, 9888, 545390592, 33288, 9888, 545390592, 33288, 11408, 68157440, 4161, 11408, 68157440, 4161, 11408, 68157440, 4161, 11408, 68157440, 4161, 11408, 68157440, 4161, 11424, 68157440, 4161, 11424, 68157440, 4161, 11424, 68157440, 4161, 11424, 68157440, 4161, 11424, 68157440, 4161] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469237714554377_893_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469237714554377_893_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..5c09bad5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469237714554377_893_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,288 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 18)) { + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (35 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() == 6)) { + if ((WaveGetLaneIndex() == 45)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 30))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 49)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((133 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((140 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 51))) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((204 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((219 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + if ((i3 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 21)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((232 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 168 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [768, 37449, 0, 768, 37449, 0, 768, 37449, 0, 768, 37449, 0, 768, 37449, 0, 768, 37449, 0, 1408, 65, 0, 1408, 65, 0, 8532, 0, 613416960, 8532, 0, 613416960, 8532, 0, 613416960, 8532, 0, 613416960, 8536, 0, 613416960, 8536, 0, 613416960, 8536, 0, 613416960, 8536, 0, 613416960, 8548, 0, 613416960, 8548, 0, 613416960, 8548, 0, 613416960, 8548, 0, 613416960, 8552, 0, 613416960, 8552, 0, 613416960, 8552, 0, 613416960, 8552, 0, 613416960, 8980, 4793490, 0, 8980, 4793490, 0, 8980, 4793490, 0, 8980, 4793490, 0, 8980, 4793490, 0, 8980, 4793490, 0, 8980, 4793490, 0, 8980, 4793490, 0, 8984, 4793490, 0, 8984, 4793490, 0, 8984, 4793490, 0, 8984, 4793490, 0, 8984, 4793490, 0, 8984, 4793490, 0, 8984, 4793490, 0, 8984, 4793490, 0, 8996, 4793490, 0, 8996, 4793490, 0, 8996, 4793490, 0, 8996, 4793490, 0, 8996, 4793490, 0, 8996, 4793490, 0, 8996, 4793490, 0, 8996, 4793490, 0, 9000, 4793490, 0, 9000, 4793490, 0, 9000, 4793490, 0, 9000, 4793490, 0, 9000, 4793490, 0, 9000, 4793490, 0, 9000, 4793490, 0, 9000, 4793490, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469240664039030_894_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469240664039030_894_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf0ae032 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469240664039030_894_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,183 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 60))) { + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((60 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((70 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((79 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((91 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((95 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 234 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1216, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1232, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 1248, 136348168, 2181570690, 4480, 16, 0, 4496, 16, 0, 4512, 16, 0, 5376, 4195328, 67125252, 5376, 4195328, 67125252, 5376, 4195328, 67125252, 5376, 4195328, 67125252, 5376, 4195328, 67125252, 5392, 4195328, 67125252, 5392, 4195328, 67125252, 5392, 4195328, 67125252, 5392, 4195328, 67125252, 5392, 4195328, 67125252, 5408, 4195328, 67125252, 5408, 4195328, 67125252, 5408, 4195328, 67125252, 5408, 4195328, 67125252, 5408, 4195328, 67125252, 5824, 524416, 0, 5824, 524416, 0, 5840, 524416, 0, 5840, 524416, 0, 5856, 524416, 0, 5856, 524416, 0, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513, 7168, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469248825502730_895_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469248825502730_895_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..082dcf67 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469248825502730_895_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,235 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 47)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((66 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 2))) { + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((92 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((122 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((137 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((152 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((159 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 44))) { + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 2))) { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 32))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((246 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((261 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 144 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 0, 4286578688, 2688, 0, 4286578688, 2688, 0, 4286578688, 2688, 0, 4286578688, 2688, 0, 4286578688, 2688, 0, 4286578688, 2688, 0, 4286578688, 2688, 0, 4286578688, 2688, 0, 4286578688, 2304, 1073872960, 525312, 2304, 1073872960, 525312, 2304, 1073872960, 525312, 2304, 1073872960, 525312, 2304, 1073872960, 525312, 1920, 1983, 0, 1920, 1983, 0, 1920, 1983, 0, 1920, 1983, 0, 1920, 1983, 0, 1920, 1983, 0, 1920, 1983, 0, 1920, 1983, 0, 1920, 1983, 0, 1920, 1983, 0, 3328, 73, 0, 3328, 73, 0, 3328, 73, 0, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513, 10496, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469276255827097_897_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469276255827097_897_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469276255827097_897_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469276476392709_898_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469276476392709_898_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a959724e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469276476392709_898_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,363 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((69 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 55))) { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((129 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (225 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 51))) { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((271 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (282 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (292 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 480 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 1792, 272696336, 68174084, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 2432, 613566756, 1227133513, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3712, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 3728, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4416, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4432, 136348168, 2181570690, 4992, 272696336, 68174084, 4992, 272696336, 68174084, 4992, 272696336, 68174084, 4992, 272696336, 68174084, 4992, 272696336, 68174084, 4992, 272696336, 68174084, 4992, 272696336, 68174084, 4992, 272696336, 68174084, 4992, 272696336, 68174084, 4992, 272696336, 68174084, 11264, 4681, 2415919104, 11264, 4681, 2415919104, 11264, 4681, 2415919104, 11264, 4681, 2415919104, 11264, 4681, 2415919104, 11264, 4681, 2415919104, 11264, 4681, 2415919104, 12432, 9, 2415919104, 12432, 9, 2415919104, 12432, 9, 2415919104, 12432, 9, 2415919104, 12448, 9, 2415919104, 12448, 9, 2415919104, 12448, 9, 2415919104, 12448, 9, 2415919104, 12464, 9, 2415919104, 12464, 9, 2415919104, 12464, 9, 2415919104, 12464, 9, 2415919104, 13136, 4681, 2415919104, 13136, 4681, 2415919104, 13136, 4681, 2415919104, 13136, 4681, 2415919104, 13136, 4681, 2415919104, 13136, 4681, 2415919104, 13136, 4681, 2415919104, 13152, 4681, 2415919104, 13152, 4681, 2415919104, 13152, 4681, 2415919104, 13152, 4681, 2415919104, 13152, 4681, 2415919104, 13152, 4681, 2415919104, 13152, 4681, 2415919104, 13168, 4681, 2415919104, 13168, 4681, 2415919104, 13168, 4681, 2415919104, 13168, 4681, 2415919104, 13168, 4681, 2415919104, 13168, 4681, 2415919104, 13168, 4681, 2415919104, 13824, 585, 2147483648, 13824, 585, 2147483648, 13824, 585, 2147483648, 13824, 585, 2147483648, 13824, 585, 2147483648, 14400, 272696336, 68174084, 14400, 272696336, 68174084, 14400, 272696336, 68174084, 14400, 272696336, 68174084, 14400, 272696336, 68174084, 14400, 272696336, 68174084, 14400, 272696336, 68174084, 14400, 272696336, 68174084, 14400, 272696336, 68174084, 14400, 272696336, 68174084, 17360, 0, 16777216, 17376, 0, 16777216, 17392, 0, 16777216, 18048, 1048576, 0, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 19584, 613548032, 299593, 20800, 0, 1] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469325648577214_901_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469325648577214_901_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b75e6762 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469325648577214_901_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,184 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 15)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 30)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((25 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 23))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (128 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 177 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1168, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1184, 21845, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1616, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 1632, 357913941, 0, 3520, 21, 1409286144, 3520, 21, 1409286144, 3520, 21, 1409286144, 3520, 21, 1409286144, 3520, 21, 1409286144, 3520, 21, 1409286144, 7744, 21, 1409286144, 7744, 21, 1409286144, 7744, 21, 1409286144, 7744, 21, 1409286144, 7744, 21, 1409286144, 7744, 21, 1409286144, 8192, 268435456, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469414023018922_904_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469414023018922_904_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..994ad7ad --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469414023018922_904_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 31)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 34)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 294 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2688, 2863311530, 2863311530, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 2304, 1431655765, 1431655765, 5184, 131328, 524800, 5184, 131328, 524800, 5184, 131328, 524800, 5184, 131328, 524800, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4800, 2147352319, 0, 4416, 0, 4] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469414595447311_905_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469414595447311_905_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e8ce2ae2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469414595447311_905_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 69 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0, 1920, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469414962376829_906_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469414962376829_906_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..990389f0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469414962376829_906_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,300 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 57)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((22 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((36 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((68 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 51))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((100 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((115 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((141 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((150 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 45)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() == 61)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((190 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((197 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (214 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 198 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9040, 2181570690, 545392672, 9616, 272696336, 68174084, 9616, 272696336, 68174084, 9616, 272696336, 68174084, 9616, 272696336, 68174084, 9616, 272696336, 68174084, 9616, 272696336, 68174084, 9616, 272696336, 68174084, 9616, 272696336, 68174084, 9616, 272696336, 68174084, 9616, 272696336, 68174084, 13696, 73, 0, 13696, 73, 0, 13696, 73, 0, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14272, 1363481681, 340870420, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469666712933436_908_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469666712933436_908_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cac51aea --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469666712933436_908_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469666953250375_909_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469666953250375_909_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b08c2dac --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469666953250375_909_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,264 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (86 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 40))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 48))) { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 13))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((188 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((209 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((224 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (243 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 43))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 243 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 4608, 17, 0, 4608, 17, 0, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5184, 286331153, 286331153, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 5504, 2004318071, 2004318071, 16512, 128, 2048, 16512, 128, 2048] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469669421531563_910_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469669421531563_910_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d8ca7ecc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469669421531563_910_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,262 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 28)) { + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((27 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((41 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((48 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + if ((WaveGetLaneIndex() >= 58)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((119 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (142 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 27)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((187 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((198 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 528 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 1, 0, 1744, 63, 0, 1744, 63, 0, 1744, 63, 0, 1744, 63, 0, 1744, 63, 0, 1744, 63, 0, 1760, 63, 0, 1760, 63, 0, 1760, 63, 0, 1760, 63, 0, 1760, 63, 0, 1760, 63, 0, 1776, 63, 0, 1776, 63, 0, 1776, 63, 0, 1776, 63, 0, 1776, 63, 0, 1776, 63, 0, 2644, 1023, 0, 2644, 1023, 0, 2644, 1023, 0, 2644, 1023, 0, 2644, 1023, 0, 2644, 1023, 0, 2644, 1023, 0, 2644, 1023, 0, 2644, 1023, 0, 2644, 1023, 0, 2648, 1023, 0, 2648, 1023, 0, 2648, 1023, 0, 2648, 1023, 0, 2648, 1023, 0, 2648, 1023, 0, 2648, 1023, 0, 2648, 1023, 0, 2648, 1023, 0, 2648, 1023, 0, 2660, 1023, 0, 2660, 1023, 0, 2660, 1023, 0, 2660, 1023, 0, 2660, 1023, 0, 2660, 1023, 0, 2660, 1023, 0, 2660, 1023, 0, 2660, 1023, 0, 2660, 1023, 0, 2664, 1023, 0, 2664, 1023, 0, 2664, 1023, 0, 2664, 1023, 0, 2664, 1023, 0, 2664, 1023, 0, 2664, 1023, 0, 2664, 1023, 0, 2664, 1023, 0, 2664, 1023, 0, 2676, 1023, 0, 2676, 1023, 0, 2676, 1023, 0, 2676, 1023, 0, 2676, 1023, 0, 2676, 1023, 0, 2676, 1023, 0, 2676, 1023, 0, 2676, 1023, 0, 2676, 1023, 0, 2680, 1023, 0, 2680, 1023, 0, 2680, 1023, 0, 2680, 1023, 0, 2680, 1023, 0, 2680, 1023, 0, 2680, 1023, 0, 2680, 1023, 0, 2680, 1023, 0, 2680, 1023, 0, 3092, 511, 0, 3092, 511, 0, 3092, 511, 0, 3092, 511, 0, 3092, 511, 0, 3092, 511, 0, 3092, 511, 0, 3092, 511, 0, 3092, 511, 0, 3096, 511, 0, 3096, 511, 0, 3096, 511, 0, 3096, 511, 0, 3096, 511, 0, 3096, 511, 0, 3096, 511, 0, 3096, 511, 0, 3096, 511, 0, 3108, 511, 0, 3108, 511, 0, 3108, 511, 0, 3108, 511, 0, 3108, 511, 0, 3108, 511, 0, 3108, 511, 0, 3108, 511, 0, 3108, 511, 0, 3112, 511, 0, 3112, 511, 0, 3112, 511, 0, 3112, 511, 0, 3112, 511, 0, 3112, 511, 0, 3112, 511, 0, 3112, 511, 0, 3112, 511, 0, 3124, 511, 0, 3124, 511, 0, 3124, 511, 0, 3124, 511, 0, 3124, 511, 0, 3124, 511, 0, 3124, 511, 0, 3124, 511, 0, 3124, 511, 0, 3128, 511, 0, 3128, 511, 0, 3128, 511, 0, 3128, 511, 0, 3128, 511, 0, 3128, 511, 0, 3128, 511, 0, 3128, 511, 0, 3128, 511, 0, 4544, 65, 0, 4544, 65, 0, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 5120, 272696336, 68174084, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 7936, 68174084, 1090785345, 9088, 10, 2852126720, 9088, 10, 2852126720, 9088, 10, 2852126720, 9088, 10, 2852126720, 9088, 10, 2852126720, 9088, 10, 2852126720, 10304, 10, 2852126720, 10304, 10, 2852126720, 10304, 10, 2852126720, 10304, 10, 2852126720, 10304, 10, 2852126720, 10304, 10, 2852126720, 10320, 10, 2852126720, 10320, 10, 2852126720, 10320, 10, 2852126720, 10320, 10, 2852126720, 10320, 10, 2852126720, 10320, 10, 2852126720, 12672, 0, 2147483648, 12688, 0, 2147483648] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469703813866764_911_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469703813866764_911_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6fe02ec6 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469703813866764_911_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 0))) { + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((82 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513, 6144, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469704254856492_912_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469704254856492_912_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b7420cba --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469704254856492_912_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,341 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 54)) { + if ((WaveGetLaneIndex() == 25)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((40 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((47 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 35))) { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (84 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (140 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((161 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((171 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((180 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((185 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((192 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((196 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((228 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 1)) { + break; + } + } + break; + } + case 3: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 50))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((306 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (320 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5376, 0, 1048576, 14608, 4, 67371008, 14608, 4, 67371008, 14608, 4, 67371008] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469789318860667_914_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469789318860667_914_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..35d7af10 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469789318860667_914_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,206 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((((WaveGetLaneIndex() == 26) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 34))) { + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (32 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (51 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 37))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (79 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 48))) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 56)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((147 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 84 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3264, 0, 4, 3584, 67108864, 0, 6272, 1073741824, 65536, 6272, 1073741824, 65536, 6912, 17, 0, 6912, 17, 0, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 7808, 1078215748, 1145324608, 9408, 0, 2852126720, 9408, 0, 2852126720, 9408, 0, 2852126720, 9408, 0, 2852126720, 9424, 0, 2852126720, 9424, 0, 2852126720, 9424, 0, 2852126720, 9424, 0, 2852126720] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469797929376202_917_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469797929376202_917_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..18390f5e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469797929376202_917_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,137 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() == 15)) { + if ((WaveGetLaneIndex() == 39)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (22 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (31 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 54))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (60 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 102 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 1984, 272696336, 68174084, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513, 4160, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469798360489429_918_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469798360489429_918_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..bf42fdb0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469798360489429_918_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,222 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() & 1) == 0)) { + if ((WaveGetLaneIndex() == 24)) { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (14 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 58))) { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 15))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + } else { + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((203 << 6) | (counter2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + continue; + } + } + if ((counter2 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 159 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1936, 16777216, 0, 1952, 16777216, 0, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 4928, 1431655765, 1431655765, 8256, 64, 201326592, 8256, 64, 201326592, 8256, 64, 201326592, 8896, 64, 0, 11200, 134218240, 1073774592, 11200, 134218240, 1073774592, 11200, 134218240, 1073774592, 11200, 134218240, 1073774592, 13008, 128, 32768, 13008, 128, 32768, 13012, 128, 32768, 13012, 128, 32768, 13024, 128, 32768, 13024, 128, 32768, 13028, 128, 32768, 13028, 128, 32768, 14336, 8, 1073745920, 14336, 8, 1073745920, 14336, 8, 1073745920] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469832313649030_919_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469832313649030_919_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..28703b13 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469832313649030_919_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,116 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((51 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 2028 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1664, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1680, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 1696, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2692, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2696, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2700, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2708, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2712, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2716, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2724, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2728, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 2732, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3268, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3272, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3276, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3284, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3288, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3292, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3300, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3304, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530, 3308, 2863311530, 2863311530] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469876177646350_922_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469876177646350_922_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ce70e589 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469876177646350_922_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,159 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((54 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 41))) { + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 183 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1232, 299593, 0, 1232, 299593, 0, 1232, 299593, 0, 1232, 299593, 0, 1232, 299593, 0, 1232, 299593, 0, 1232, 299593, 0, 1248, 299593, 0, 1248, 299593, 0, 1248, 299593, 0, 1248, 299593, 0, 1248, 299593, 0, 1248, 299593, 0, 1248, 299593, 0, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 4224, 1363481681, 340870420, 5392, 0, 32768, 5408, 0, 32768, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7760, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928, 7776, 18724, 1227132928] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469879716428254_923_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469879716428254_923_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d7bb4da5 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469879716428254_923_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,121 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((28 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 11)) { + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((38 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 45)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((45 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 1)) { + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 170, 2860515328, 1792, 170, 2860515328, 1792, 170, 2860515328, 1792, 170, 2860515328, 1792, 170, 2860515328, 1792, 170, 2860515328, 1792, 170, 2860515328, 1792, 170, 2860515328, 1792, 170, 2860515328, 1808, 170, 2860515328, 1808, 170, 2860515328, 1808, 170, 2860515328, 1808, 170, 2860515328, 1808, 170, 2860515328, 1808, 170, 2860515328, 1808, 170, 2860515328, 1808, 170, 2860515328, 1808, 170, 2860515328, 2432, 682, 0, 2432, 682, 0, 2432, 682, 0, 2432, 682, 0, 2432, 682, 0, 2448, 682, 0, 2448, 682, 0, 2448, 682, 0, 2448, 682, 0, 2448, 682, 0, 2880, 0, 2863308800, 2880, 0, 2863308800, 2880, 0, 2863308800, 2880, 0, 2863308800, 2880, 0, 2863308800, 2880, 0, 2863308800, 2880, 0, 2863308800, 2880, 0, 2863308800, 2880, 0, 2863308800, 2880, 0, 2863308800, 2896, 0, 2863308800, 2896, 0, 2863308800, 2896, 0, 2863308800, 2896, 0, 2863308800, 2896, 0, 2863308800, 2896, 0, 2863308800, 2896, 0, 2863308800, 2896, 0, 2863308800, 2896, 0, 2863308800, 2896, 0, 2863308800, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3584, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768, 3600, 10922, 2863136768] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756469881270725247_924_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756469881270725247_924_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7ac5c286 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756469881270725247_924_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,264 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (34 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((56 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((97 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((116 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + } else { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 53))) { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((160 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((173 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((i0 == 2)) { + break; + } + } + break; + } + case 1: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((191 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((212 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 31)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((219 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (227 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 219 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 1920, 838860, 0, 3584, 69905, 0, 3584, 69905, 0, 3584, 69905, 0, 3584, 69905, 0, 3584, 69905, 0, 3600, 69905, 0, 3600, 69905, 0, 3600, 69905, 0, 3600, 69905, 0, 3600, 69905, 0, 3616, 69905, 0, 3616, 69905, 0, 3616, 69905, 0, 3616, 69905, 0, 3616, 69905, 0, 4736, 17, 268435456, 4736, 17, 268435456, 4736, 17, 268435456, 4752, 17, 268435456, 4752, 17, 268435456, 4752, 17, 268435456, 4768, 17, 268435456, 4768, 17, 268435456, 4768, 17, 268435456, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14528, 1145324612, 1145324612, 14976, 559240, 0, 14976, 559240, 0, 14976, 559240, 0, 14976, 559240, 0, 14976, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470009494101565_927_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470009494101565_927_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8700d094 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470009494101565_927_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,68 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMax(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((18 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 189 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1152, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1168, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472, 1184, 524287, 3221225472] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470010269596697_928_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470010269596697_928_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7d84f205 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470010269596697_928_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,167 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 21)) { + if ((WaveGetLaneIndex() == 27)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (49 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (61 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 9 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3456, 2097152, 0, 5632, 131072, 0, 5248, 1073741824, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470049971661084_930_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470049971661084_930_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7e0ff961 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470049971661084_930_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,342 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 63))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((78 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 3)) { + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((103 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 54)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((127 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 51)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + if ((i0 == 2)) { + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((179 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((193 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((202 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 52)) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (215 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 11)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (247 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 45))) { + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((280 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 23) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((295 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 1)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((302 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() == 28)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (336 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 189 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1920, 134217730, 1049664, 1920, 134217730, 1049664, 1920, 134217730, 1049664, 1920, 134217730, 1049664, 1920, 134217730, 1049664, 1936, 134217730, 1049664, 1936, 134217730, 1049664, 1936, 134217730, 1049664, 1936, 134217730, 1049664, 1936, 134217730, 1049664, 1952, 134217730, 1049664, 1952, 134217730, 1049664, 1952, 134217730, 1049664, 1952, 134217730, 1049664, 1952, 134217730, 1049664, 8128, 134221824, 33554432, 8128, 134221824, 33554432, 8128, 134221824, 33554432, 8144, 134221824, 33554432, 8144, 134221824, 33554432, 8144, 134221824, 33554432, 8160, 134221824, 33554432, 8160, 134221824, 33554432, 8160, 134221824, 33554432, 9088, 131072, 537395200, 9088, 131072, 537395200, 9088, 131072, 537395200, 9104, 131072, 537395200, 9104, 131072, 537395200, 9104, 131072, 537395200, 9120, 131072, 537395200, 9120, 131072, 537395200, 9120, 131072, 537395200, 11472, 16777216, 0, 11488, 16777216, 0, 11504, 16777216, 0, 14656, 0, 67108864, 17920, 4, 1227096064, 17920, 4, 1227096064, 17920, 4, 1227096064, 17920, 4, 1227096064, 17920, 4, 1227096064, 17920, 4, 1227096064, 17936, 4, 1227096064, 17936, 4, 1227096064, 17936, 4, 1227096064, 17936, 4, 1227096064, 17936, 4, 1227096064, 17936, 4, 1227096064, 17952, 4, 1227096064, 17952, 4, 1227096064, 17952, 4, 1227096064, 17952, 4, 1227096064, 17952, 4, 1227096064, 17952, 4, 1227096064, 18880, 8388608, 0, 18896, 8388608, 0, 18912, 8388608, 0, 21760, 131072, 0, 21504, 268435456, 0, 21120, 32768, 2097664, 21120, 32768, 2097664, 21120, 32768, 2097664] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470097274578500_931_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470097274578500_931_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..a8d47744 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470097274578500_931_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,135 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() == 49)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3392, 85, 0, 3392, 85, 0, 3392, 85, 0, 3392, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470100062272548_933_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470100062272548_933_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..40cf5180 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470100062272548_933_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,532 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 50))) { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 55))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (27 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 51))) { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (76 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 17)) { + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 19)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (110 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (114 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (123 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 2: { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 31)) { + if ((WaveGetLaneIndex() == 30)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (168 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 47))) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((240 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((247 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 9)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((262 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() == 24) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((281 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((291 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (325 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (334 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (339 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 3: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((362 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (371 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 48))) { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 49))) { + result = (result + WaveActiveMax(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (393 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (404 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (413 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((431 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 44))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((449 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((460 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((467 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 62))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((480 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter4 == 2)) { + break; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (487 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 369 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1728, 3, 3061841920, 1728, 3, 3061841920, 1728, 3, 3061841920, 1728, 3, 3061841920, 1728, 3, 3061841920, 1728, 3, 3061841920, 1728, 3, 3061841920, 1728, 3, 3061841920, 4288, 0, 524288, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 4864, 2317918856, 141474, 5696, 37448, 0, 5696, 37448, 0, 5696, 37448, 0, 5696, 37448, 0, 5696, 37448, 0, 6144, 37448, 0, 6144, 37448, 0, 6144, 37448, 0, 6144, 37448, 0, 6144, 37448, 0, 6720, 272696336, 16644, 6720, 272696336, 16644, 6720, 272696336, 16644, 6720, 272696336, 16644, 6720, 272696336, 16644, 6720, 272696336, 16644, 6720, 272696336, 16644, 6720, 272696336, 16644, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 7872, 1363481680, 83220, 9024, 1, 4278190080, 9024, 1, 4278190080, 9024, 1, 4278190080, 9024, 1, 4278190080, 9024, 1, 4278190080, 9024, 1, 4278190080, 9024, 1, 4278190080, 9024, 1, 4278190080, 9024, 1, 4278190080, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 11456, 1, 4294963200, 17984, 16777216, 4096, 17984, 16777216, 4096, 17988, 16777216, 4096, 17988, 16777216, 4096, 17992, 16777216, 4096, 17992, 16777216, 4096, 18000, 16777216, 4096, 18000, 16777216, 4096, 18004, 16777216, 4096, 18004, 16777216, 4096, 18008, 16777216, 4096, 18008, 16777216, 4096, 19584, 0, 4096, 20800, 64, 0, 21376, 4195328, 67125252, 21376, 4195328, 67125252, 21376, 4195328, 67125252, 21376, 4195328, 67125252, 21376, 4195328, 67125252, 21696, 67125252, 1074004032, 21696, 67125252, 1074004032, 21696, 67125252, 1074004032, 21696, 67125252, 1074004032, 21696, 67125252, 1074004032, 21696, 67125252, 1074004032, 27600, 2048, 0, 27616, 2048, 0, 28752, 2048, 134217728, 28752, 2048, 134217728, 28768, 2048, 134217728, 28768, 2048, 134217728] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470219399902087_934_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470219399902087_934_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..50ee7f8b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470219399902087_934_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,354 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 4))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 36))) { + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (149 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 62)) { + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (159 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (178 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (185 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 17)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((218 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((234 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 32)) { + if ((WaveGetLaneIndex() >= 59)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((244 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((253 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 50)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((260 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + if ((counter0 == 2)) { + break; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 9))) { + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (302 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 46))) { + if (((WaveGetLaneIndex() < 11) || (WaveGetLaneIndex() >= 57))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((345 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (352 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 378 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1472, 9, 2453667840, 1472, 9, 2453667840, 1472, 9, 2453667840, 1472, 9, 2453667840, 1472, 9, 2453667840, 1472, 9, 2453667840, 2432, 0, 8192, 3648, 134217792, 0, 3648, 134217792, 0, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 7936, 1363481681, 340870420, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 8256, 4294967295, 4294967295, 13968, 74898, 0, 13968, 74898, 0, 13968, 74898, 0, 13968, 74898, 0, 13968, 74898, 0, 13968, 74898, 0, 13984, 74898, 0, 13984, 74898, 0, 13984, 74898, 0, 13984, 74898, 0, 13984, 74898, 0, 13984, 74898, 0, 15636, 0, 536870912, 15640, 0, 536870912, 15652, 0, 536870912, 15656, 0, 536870912, 16212, 0, 613416960, 16212, 0, 613416960, 16212, 0, 613416960, 16212, 0, 613416960, 16216, 0, 613416960, 16216, 0, 613416960, 16216, 0, 613416960, 16216, 0, 613416960, 16228, 0, 613416960, 16228, 0, 613416960, 16228, 0, 613416960, 16228, 0, 613416960, 16232, 0, 613416960, 16232, 0, 613416960, 16232, 0, 613416960, 16232, 0, 613416960] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470234948800439_935_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470234948800439_935_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ff9c4a17 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470234948800439_935_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,343 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 36))) { + if ((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (43 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 18) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((75 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((104 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 2)) { + break; + } + } + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (133 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (143 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (152 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (157 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (172 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (181 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 47))) { + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((230 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (239 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if (((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((265 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 2)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (291 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (301 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 147 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 8512, 9362, 613416960, 8512, 9362, 613416960, 8512, 9362, 613416960, 8512, 9362, 613416960, 8512, 9362, 613416960, 8512, 9362, 613416960, 8512, 9362, 613416960, 8512, 9362, 613416960, 8512, 9362, 613416960, 9728, 4260880, 68174084, 9728, 4260880, 68174084, 9728, 4260880, 68174084, 9728, 4260880, 68174084, 9728, 4260880, 68174084, 9728, 4260880, 68174084, 9728, 4260880, 68174084, 9728, 4260880, 68174084, 9728, 4260880, 68174084, 11008, 146, 0, 11008, 146, 0, 11008, 146, 0, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 11328, 613566756, 1227133513, 19264, 85, 0, 19264, 85, 0, 19264, 85, 0, 19264, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470360878505138_937_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470360878505138_937_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..8bcb821f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470360878505138_937_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,219 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 29))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((37 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + if ((i1 == 2)) { + break; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((62 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 57))) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (80 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((144 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 62))) { + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 40))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((166 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((173 << 6) | (i3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i4 == 2)) { + break; + } + } + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((183 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5120, 16, 0, 6336, 1048832, 16781313, 6336, 1048832, 16781313, 6336, 1048832, 16781313, 6336, 1048832, 16781313, 6336, 1048832, 16781313, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 7488, 1145324612, 1145324612, 11072, 0, 2290649088, 11072, 0, 2290649088, 11072, 0, 2290649088, 11072, 0, 2290649088, 11072, 0, 2290649088, 11072, 0, 2290649088, 11076, 0, 2290649088, 11076, 0, 2290649088, 11076, 0, 2290649088, 11076, 0, 2290649088, 11076, 0, 2290649088, 11076, 0, 2290649088, 11080, 0, 2290649088, 11080, 0, 2290649088, 11080, 0, 2290649088, 11080, 0, 2290649088, 11080, 0, 2290649088, 11080, 0, 2290649088, 11088, 0, 2290649088, 11088, 0, 2290649088, 11088, 0, 2290649088, 11088, 0, 2290649088, 11088, 0, 2290649088, 11088, 0, 2290649088, 11092, 0, 2290649088, 11092, 0, 2290649088, 11092, 0, 2290649088, 11092, 0, 2290649088, 11092, 0, 2290649088, 11092, 0, 2290649088, 11096, 0, 2290649088, 11096, 0, 2290649088, 11096, 0, 2290649088, 11096, 0, 2290649088, 11096, 0, 2290649088, 11096, 0, 2290649088] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470552764029220_940_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470552764029220_940_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..4bb76cfb --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470552764029220_940_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,123 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 22)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((24 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (39 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 93 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1536, 4194304, 0, 1552, 4194304, 0, 1568, 4194304, 0, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 1856, 613566756, 1227133513, 2496, 85, 0, 2496, 85, 0, 2496, 85, 0, 2496, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470554117078503_941_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470554117078503_941_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..9c35aca2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470554117078503_941_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,314 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() >= 39)) { + if ((WaveGetLaneIndex() < 7)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 50))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((80 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 23))) { + if ((((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (106 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (129 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (138 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 62)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 1)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 58))) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 23)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (211 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (224 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (233 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (240 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 49)) { + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (250 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (254 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 174 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 2704, 0, 1227133504, 2704, 0, 1227133504, 2704, 0, 1227133504, 2704, 0, 1227133504, 2704, 0, 1227133504, 2704, 0, 1227133504, 2704, 0, 1227133504, 2704, 0, 1227133504, 2704, 0, 1227133504, 2708, 0, 1227133504, 2708, 0, 1227133504, 2708, 0, 1227133504, 2708, 0, 1227133504, 2708, 0, 1227133504, 2708, 0, 1227133504, 2708, 0, 1227133504, 2708, 0, 1227133504, 2708, 0, 1227133504, 2720, 0, 1227133504, 2720, 0, 1227133504, 2720, 0, 1227133504, 2720, 0, 1227133504, 2720, 0, 1227133504, 2720, 0, 1227133504, 2720, 0, 1227133504, 2720, 0, 1227133504, 2720, 0, 1227133504, 2724, 0, 1227133504, 2724, 0, 1227133504, 2724, 0, 1227133504, 2724, 0, 1227133504, 2724, 0, 1227133504, 2724, 0, 1227133504, 2724, 0, 1227133504, 2724, 0, 1227133504, 2724, 0, 1227133504, 5136, 0, 272629760, 5136, 0, 272629760, 5152, 0, 272629760, 5152, 0, 272629760, 8576, 0, 1090785280, 8576, 0, 1090785280, 8576, 0, 1090785280, 8576, 0, 1090785280, 9280, 0, 1073741824] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470596941513181_944_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470596941513181_944_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..c1c656ae --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470596941513181_944_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,169 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 57))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((72 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 10)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((87 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((98 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 56))) { + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((116 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((129 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((138 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((153 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 141 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 2960, 0, 64, 2976, 0, 64, 7440, 0, 1224736768, 7440, 0, 1224736768, 7440, 0, 1224736768, 7456, 0, 1224736768, 7456, 0, 1224736768, 7456, 0, 1224736768, 8272, 4, 1073741824, 8272, 4, 1073741824, 8288, 4, 1073741824, 8288, 4, 1073741824, 8848, 1065220, 1090785345, 8848, 1065220, 1090785345, 8848, 1065220, 1090785345, 8848, 1065220, 1090785345, 8848, 1065220, 1090785345, 8848, 1065220, 1090785345, 8848, 1065220, 1090785345, 8848, 1065220, 1090785345, 8848, 1065220, 1090785345, 8848, 1065220, 1090785345, 8864, 1065220, 1090785345, 8864, 1065220, 1090785345, 8864, 1065220, 1090785345, 8864, 1065220, 1090785345, 8864, 1065220, 1090785345, 8864, 1065220, 1090785345, 8864, 1065220, 1090785345, 8864, 1065220, 1090785345, 8864, 1065220, 1090785345, 8864, 1065220, 1090785345, 9808, 0, 1073741824, 9824, 0, 1073741824] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470598491213478_945_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470598491213478_945_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..d190f469 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470598491213478_945_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,80 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 35)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 59)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 0, 8, 1040, 0, 8, 1472, 0, 134217728, 1488, 0, 134217728] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470599449745723_946_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470599449745723_946_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..618f03b2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470599449745723_946_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,232 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 36))) { + if (((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 55)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (29 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + break; + } + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 62))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 8)) { + if ((WaveGetLaneIndex() >= 40)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 13)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (116 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 14))) { + if ((((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((181 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((190 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 240 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [4096, 0, 16, 5632, 65, 0, 5632, 65, 0, 6336, 65, 0, 6336, 65, 0, 7424, 16, 0, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 7744, 68174084, 1090785345, 8384, 73, 0, 8384, 73, 0, 8384, 73, 0, 11600, 545392672, 136348168, 11600, 545392672, 136348168, 11600, 545392672, 136348168, 11600, 545392672, 136348168, 11600, 545392672, 136348168, 11600, 545392672, 136348168, 11600, 545392672, 136348168, 11600, 545392672, 136348168, 11600, 545392672, 136348168, 11600, 545392672, 136348168, 11616, 545392672, 136348168, 11616, 545392672, 136348168, 11616, 545392672, 136348168, 11616, 545392672, 136348168, 11616, 545392672, 136348168, 11616, 545392672, 136348168, 11616, 545392672, 136348168, 11616, 545392672, 136348168, 11616, 545392672, 136348168, 11616, 545392672, 136348168, 11632, 545392672, 136348168, 11632, 545392672, 136348168, 11632, 545392672, 136348168, 11632, 545392672, 136348168, 11632, 545392672, 136348168, 11632, 545392672, 136348168, 11632, 545392672, 136348168, 11632, 545392672, 136348168, 11632, 545392672, 136348168, 11632, 545392672, 136348168, 12176, 545392672, 136348168, 12176, 545392672, 136348168, 12176, 545392672, 136348168, 12176, 545392672, 136348168, 12176, 545392672, 136348168, 12176, 545392672, 136348168, 12176, 545392672, 136348168, 12176, 545392672, 136348168, 12176, 545392672, 136348168, 12176, 545392672, 136348168, 12192, 545392672, 136348168, 12192, 545392672, 136348168, 12192, 545392672, 136348168, 12192, 545392672, 136348168, 12192, 545392672, 136348168, 12192, 545392672, 136348168, 12192, 545392672, 136348168, 12192, 545392672, 136348168, 12192, 545392672, 136348168, 12192, 545392672, 136348168, 12208, 545392672, 136348168, 12208, 545392672, 136348168, 12208, 545392672, 136348168, 12208, 545392672, 136348168, 12208, 545392672, 136348168, 12208, 545392672, 136348168, 12208, 545392672, 136348168, 12208, 545392672, 136348168, 12208, 545392672, 136348168, 12208, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470602409257732_947_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470602409257732_947_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..6042a759 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470602409257732_947_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,417 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() == 30) || (WaveGetLaneIndex() == 32))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((26 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 9))) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((74 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((110 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((121 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((136 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 27)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 28)) { + if ((WaveGetLaneIndex() == 17)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (175 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } else { + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (182 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (205 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() == 8)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (212 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 27)) { + if ((WaveGetLaneIndex() >= 48)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((242 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 57)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (251 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (261 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (274 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() < 12)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (281 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (286 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (293 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (303 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter3 = 0; + while ((counter3 < 3)) { + counter3 = (counter3 + 1); + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((325 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (330 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (337 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 309 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [11648, 0, 268435456, 12288, 17, 0, 12288, 17, 0, 13568, 256, 0, 15504, 139810, 0, 15504, 139810, 0, 15504, 139810, 0, 15504, 139810, 0, 15504, 139810, 0, 15520, 139810, 0, 15520, 139810, 0, 15520, 139810, 0, 15520, 139810, 0, 15520, 139810, 0, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18304, 1145324612, 1145324612, 18752, 559240, 0, 18752, 559240, 0, 18752, 559240, 0, 18752, 559240, 0, 18752, 559240, 0, 19392, 17, 0, 19392, 17, 0, 20816, 0, 572661760, 20816, 0, 572661760, 20816, 0, 572661760, 20816, 0, 572661760, 20816, 0, 572661760, 20820, 0, 572661760, 20820, 0, 572661760, 20820, 0, 572661760, 20820, 0, 572661760, 20820, 0, 572661760, 20824, 0, 572661760, 20824, 0, 572661760, 20824, 0, 572661760, 20824, 0, 572661760, 20824, 0, 572661760, 20832, 0, 572661760, 20832, 0, 572661760, 20832, 0, 572661760, 20832, 0, 572661760, 20832, 0, 572661760, 20836, 0, 572661760, 20836, 0, 572661760, 20836, 0, 572661760, 20836, 0, 572661760, 20836, 0, 572661760, 20840, 0, 572661760, 20840, 0, 572661760, 20840, 0, 572661760, 20840, 0, 572661760, 20840, 0, 572661760, 20848, 0, 572661760, 20848, 0, 572661760, 20848, 0, 572661760, 20848, 0, 572661760, 20848, 0, 572661760, 20852, 0, 572661760, 20852, 0, 572661760, 20852, 0, 572661760, 20852, 0, 572661760, 20852, 0, 572661760, 20856, 0, 572661760, 20856, 0, 572661760, 20856, 0, 572661760, 20856, 0, 572661760, 20856, 0, 572661760, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21120, 1145324612, 1145324612, 21568, 559240, 0, 21568, 559240, 0, 21568, 559240, 0, 21568, 559240, 0, 21568, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470630533232488_948_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470630533232488_948_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..07435109 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470630533232488_948_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,478 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 1))) { + if (((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 47))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 43))) { + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (102 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (117 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (121 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (144 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (161 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 51))) { + if ((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((205 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 44))) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((267 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((284 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((293 << 6) | (i1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((307 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((318 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 58))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (348 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (367 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (372 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (387 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (391 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (401 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() >= 60)) { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((422 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 9)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((429 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (436 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((455 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((466 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (479 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((495 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((504 << 6) | (counter5 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (509 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (514 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 303 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [13136, 0, 524288, 13152, 0, 524288, 13168, 0, 524288, 14656, 0, 4194304, 18176, 8192, 0, 18180, 8192, 0, 18192, 8192, 0, 18196, 8192, 0, 18208, 8192, 0, 18212, 8192, 0, 18752, 8192, 0, 18756, 8192, 0, 18768, 8192, 0, 18772, 8192, 0, 18784, 8192, 0, 18788, 8192, 0, 9216, 0, 131072, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 10304, 136348168, 2181570690, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20352, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20368, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 20384, 599186, 613566464, 25664, 85, 0, 25664, 85, 0, 25664, 85, 0, 25664, 85, 0, 27008, 0, 2147483648, 27024, 0, 2147483648, 27040, 0, 2147483648, 29120, 8, 34078720, 29120, 8, 34078720, 29120, 8, 34078720, 29136, 8, 34078720, 29136, 8, 34078720, 29136, 8, 34078720, 29824, 8, 34086912, 29824, 8, 34086912, 29824, 8, 34086912, 29824, 8, 34086912, 29840, 8, 34086912, 29840, 8, 34086912, 29840, 8, 34086912, 29840, 8, 34086912, 32896, 545392672, 136348168, 32896, 545392672, 136348168, 32896, 545392672, 136348168, 32896, 545392672, 136348168, 32896, 545392672, 136348168, 32896, 545392672, 136348168, 32896, 545392672, 136348168, 32896, 545392672, 136348168, 32896, 545392672, 136348168, 32896, 545392672, 136348168] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470665976577094_949_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470665976577094_949_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..cf26a838 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470665976577094_949_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,455 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 11)) { + if ((WaveGetLaneIndex() == 29)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 33))) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((42 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((49 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() == 63)) { + if ((WaveGetLaneIndex() == 43)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 57)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 7)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((84 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((WaveGetLaneIndex() == 6)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (94 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (113 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (127 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (137 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (151 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (167 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((186 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((197 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (202 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (216 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 38))) { + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (253 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 4))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (268 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (277 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((WaveGetLaneIndex() == 54)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (287 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (298 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (309 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + } else { + for (uint i3 = 0; (i3 < 3); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((335 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((344 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 52))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((363 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (377 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((393 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 43))) { + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((411 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 42)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((418 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((425 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (434 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 360 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [11904, 32, 2097152, 11904, 32, 2097152, 11920, 32, 2097152, 11920, 32, 2097152, 11936, 32, 2097152, 11936, 32, 2097152, 12608, 32, 2097152, 12608, 32, 2097152, 12624, 32, 2097152, 12624, 32, 2097152, 12640, 32, 2097152, 12640, 32, 2097152, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 12928, 1145324612, 1145324612, 21440, 8, 2281701376, 21440, 8, 2281701376, 21440, 8, 2281701376, 21456, 8, 2281701376, 21456, 8, 2281701376, 21456, 8, 2281701376, 21472, 8, 2281701376, 21472, 8, 2281701376, 21472, 8, 2281701376, 22016, 128, 0, 22032, 128, 0, 22048, 128, 0, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25168, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 25184, 2863311530, 2863311530, 26320, 10, 2862612480, 26320, 10, 2862612480, 26320, 10, 2862612480, 26320, 10, 2862612480, 26320, 10, 2862612480, 26320, 10, 2862612480, 26320, 10, 2862612480, 26320, 10, 2862612480, 26336, 10, 2862612480, 26336, 10, 2862612480, 26336, 10, 2862612480, 26336, 10, 2862612480, 26336, 10, 2862612480, 26336, 10, 2862612480, 26336, 10, 2862612480, 26336, 10, 2862612480] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470681749337651_950_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470681749337651_950_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b613b5fb --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470681749337651_950_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,139 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 39))) { + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (65 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 35)) { + if ((WaveGetLaneIndex() == 5)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (100 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (104 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 57 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 17, 0, 576, 17, 0, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 1472, 1145324612, 1145324612, 6400, 8, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470682172578508_951_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470682172578508_951_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3f84f65c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470682172578508_951_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,110 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 55))) { + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 15) || (WaveGetLaneIndex() == 39))) { + if (((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (37 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((51 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((58 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 75 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080, 1216, 131071, 4278190080] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470682454897424_952_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470682454897424_952_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..af28e28f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470682454897424_952_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,302 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if ((WaveGetLaneIndex() >= 45)) { + if ((WaveGetLaneIndex() < 29)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (19 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 47)) { + if ((WaveGetLaneIndex() == 53)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((46 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((57 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() < 32)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (66 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (75 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 20))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((102 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 60)) || (WaveGetLaneIndex() == 27))) { + result = (result + WaveActiveMax(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((125 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (146 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (165 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (179 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (188 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (195 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if ((((WaveGetLaneIndex() == 22) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((239 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 51))) { + if (((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 18))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((273 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((292 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i3 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 9))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((314 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 777 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2176, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2192, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 2208, 0, 3067822080, 3648, 0, 3067084800, 3648, 0, 3067084800, 3648, 0, 3067084800, 3648, 0, 3067084800, 3648, 0, 3067084800, 3648, 0, 3067084800, 3648, 0, 3067084800, 3648, 0, 3067084800, 3664, 0, 3067084800, 3664, 0, 3067084800, 3664, 0, 3067084800, 3664, 0, 3067084800, 3664, 0, 3067084800, 3664, 0, 3067084800, 3664, 0, 3067084800, 3664, 0, 3067084800, 3680, 0, 3067084800, 3680, 0, 3067084800, 3680, 0, 3067084800, 3680, 0, 3067084800, 3680, 0, 3067084800, 3680, 0, 3067084800, 3680, 0, 3067084800, 3680, 0, 3067084800, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 4800, 1363481681, 1300, 6528, 2147483649, 0, 6528, 2147483649, 0, 6544, 2147483649, 0, 6544, 2147483649, 0, 6560, 2147483649, 0, 6560, 2147483649, 0, 8000, 167772160, 32, 8000, 167772160, 32, 8000, 167772160, 32, 8016, 167772160, 32, 8016, 167772160, 32, 8016, 167772160, 32, 8032, 167772160, 32, 8032, 167772160, 32, 8032, 167772160, 32, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 8320, 4294967295, 4294967295, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9344, 1431655765, 1431655765, 9984, 17, 0, 9984, 17, 0, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 11456, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 12032, 1145324612, 1145324612, 15296, 4210688, 2097152, 15296, 4210688, 2097152, 15296, 4210688, 2097152, 15300, 4210688, 2097152, 15300, 4210688, 2097152, 15300, 4210688, 2097152, 15312, 4210688, 2097152, 15312, 4210688, 2097152, 15312, 4210688, 2097152, 15316, 4210688, 2097152, 15316, 4210688, 2097152, 15316, 4210688, 2097152, 15328, 4210688, 2097152, 15328, 4210688, 2097152, 15328, 4210688, 2097152, 15332, 4210688, 2097152, 15332, 4210688, 2097152, 15332, 4210688, 2097152, 18688, 0, 67108864, 18692, 0, 67108864, 18704, 0, 67108864, 18708, 0, 67108864, 18720, 0, 67108864, 18724, 0, 67108864, 20096, 16777729, 65536, 20096, 16777729, 65536, 20096, 16777729, 65536, 20096, 16777729, 65536, 20112, 16777729, 65536, 20112, 16777729, 65536, 20112, 16777729, 65536, 20112, 16777729, 65536, 20128, 16777729, 65536, 20128, 16777729, 65536, 20128, 16777729, 65536, 20128, 16777729, 65536] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756470966404144537_956_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756470966404144537_956_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b78233fd --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756470966404144537_956_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,110 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 60))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 37)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 90 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 2704, 170, 2684354560, 2704, 170, 2684354560, 2704, 170, 2684354560, 2704, 170, 2684354560, 2704, 170, 2684354560, 2704, 170, 2684354560, 2708, 170, 2684354560, 2708, 170, 2684354560, 2708, 170, 2684354560, 2708, 170, 2684354560, 2708, 170, 2684354560, 2708, 170, 2684354560, 2720, 170, 2684354560, 2720, 170, 2684354560, 2720, 170, 2684354560, 2720, 170, 2684354560, 2720, 170, 2684354560, 2720, 170, 2684354560, 2724, 170, 2684354560, 2724, 170, 2684354560, 2724, 170, 2684354560, 2724, 170, 2684354560, 2724, 170, 2684354560, 2724, 170, 2684354560, 3344, 0, 32, 3360, 0, 32] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471044697544559_958_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471044697544559_958_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ef27c57a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471044697544559_958_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,124 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((39 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (48 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (53 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 237 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 1152, 1431655765, 1431655765, 2512, 73, 2449473536, 2512, 73, 2449473536, 2512, 73, 2449473536, 2512, 73, 2449473536, 2512, 73, 2449473536, 2512, 73, 2449473536, 2528, 73, 2449473536, 2528, 73, 2449473536, 2528, 73, 2449473536, 2528, 73, 2449473536, 2528, 73, 2449473536, 2528, 73, 2449473536, 3072, 272696336, 68174084, 3072, 272696336, 68174084, 3072, 272696336, 68174084, 3072, 272696336, 68174084, 3072, 272696336, 68174084, 3072, 272696336, 68174084, 3072, 272696336, 68174084, 3072, 272696336, 68174084, 3072, 272696336, 68174084, 3072, 272696336, 68174084, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513, 3392, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471045803805000_959_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471045803805000_959_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..fa6f95f2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471045803805000_959_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,158 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((43 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((56 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((65 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + break; + } + } + if ((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 56))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((83 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 2128, 272696336, 68174084, 2128, 272696336, 68174084, 2128, 272696336, 68174084, 2128, 272696336, 68174084, 2128, 272696336, 68174084, 2128, 272696336, 68174084, 2128, 272696336, 68174084, 2128, 272696336, 68174084, 2128, 272696336, 68174084, 2128, 272696336, 68174084, 2132, 272696336, 68174084, 2132, 272696336, 68174084, 2132, 272696336, 68174084, 2132, 272696336, 68174084, 2132, 272696336, 68174084, 2132, 272696336, 68174084, 2132, 272696336, 68174084, 2132, 272696336, 68174084, 2132, 272696336, 68174084, 2132, 272696336, 68174084, 2144, 272696336, 68174084, 2144, 272696336, 68174084, 2144, 272696336, 68174084, 2144, 272696336, 68174084, 2144, 272696336, 68174084, 2144, 272696336, 68174084, 2144, 272696336, 68174084, 2144, 272696336, 68174084, 2144, 272696336, 68174084, 2144, 272696336, 68174084, 2148, 272696336, 68174084, 2148, 272696336, 68174084, 2148, 272696336, 68174084, 2148, 272696336, 68174084, 2148, 272696336, 68174084, 2148, 272696336, 68174084, 2148, 272696336, 68174084, 2148, 272696336, 68174084, 2148, 272696336, 68174084, 2148, 272696336, 68174084, 2768, 16, 0, 2772, 16, 0, 2784, 16, 0, 2788, 16, 0, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4176, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4180, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4192, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 4196, 2181570690, 545392672, 5328, 33554434, 0, 5328, 33554434, 0, 5344, 33554434, 0, 5344, 33554434, 0, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513, 5632, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471050496577783_960_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471050496577783_960_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ece1a276 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471050496577783_960_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,107 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 44)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((23 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((37 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((WaveGetLaneIndex() >= 37)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((44 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 426 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1488, 0, 4096, 1504, 0, 4096, 1520, 0, 4096, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2384, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2400, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2416, 2863311530, 2863311530, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2832, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2848, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744, 2864, 0, 1431655744] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471056757549769_961_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471056757549769_961_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..e78b913e --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471056757549769_961_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,164 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((WaveGetLaneIndex() >= 59)) { + if ((WaveGetLaneIndex() >= 36)) { + result = (result + WaveActiveSum(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 23)) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (41 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (55 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 61))) { + if (((WaveGetLaneIndex() < 20) || (WaveGetLaneIndex() >= 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (89 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1792, 0, 4160749568, 1792, 0, 4160749568, 1792, 0, 4160749568, 1792, 0, 4160749568, 1792, 0, 4160749568, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 4544, 2863311530, 44739242, 5696, 31, 0, 5696, 31, 0, 5696, 31, 0, 5696, 31, 0, 5696, 31, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471095070135631_966_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471095070135631_966_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..1a5bd5bc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471095070135631_966_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,348 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 2)) { + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (12 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (36 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 9))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 55))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (70 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 57))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (87 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (96 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (101 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 10)) { + if ((WaveGetLaneIndex() < 10)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (124 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((151 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() < 2)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (158 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (177 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (187 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (196 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (201 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (208 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (217 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((WaveGetLaneIndex() < 9) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 3) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (235 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 45))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((259 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 49))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (270 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (279 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 46)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (314 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() >= 63)) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 52)) || (WaveGetLaneIndex() == 19))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (306 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2304, 0, 4194304, 5568, 0, 33554432, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6144, 272696336, 68174084, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 6464, 613566756, 1227133513, 7104, 73, 0, 7104, 73, 0, 7104, 73, 0, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 13888, 272696336, 68174084, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 17600, 613566756, 1227133513, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 20096, 0, 4294950912, 19584, 17301505, 0, 19584, 17301505, 0, 19584, 17301505, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471192870312510_968_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471192870312510_968_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..16ed98f8 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471192870312510_968_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,184 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((30 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((52 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((61 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (68 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + uint counter2 = 0; + while ((counter2 < 3)) { + counter2 = (counter2 + 1); + if ((WaveGetLaneIndex() < 4)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((87 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((117 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 63))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((132 << 6) | (counter2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((139 << 6) | (counter2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 1936, 1431655765, 1431655765, 5584, 5, 0, 5584, 5, 0, 5600, 5, 0, 5600, 5, 0, 5616, 5, 0, 5616, 5, 0, 7508, 5, 0, 7508, 5, 0, 7512, 5, 0, 7512, 5, 0, 7524, 5, 0, 7524, 5, 0, 7528, 5, 0, 7528, 5, 0, 7540, 5, 0, 7540, 5, 0, 7544, 5, 0, 7544, 5, 0, 8468, 0, 16, 8472, 0, 16, 8484, 0, 16, 8488, 0, 16, 8500, 0, 16, 8504, 0, 16, 8912, 0, 1431654400, 8912, 0, 1431654400, 8912, 0, 1431654400, 8912, 0, 1431654400, 8912, 0, 1431654400, 8912, 0, 1431654400, 8912, 0, 1431654400, 8912, 0, 1431654400, 8912, 0, 1431654400, 8912, 0, 1431654400, 8928, 0, 1431654400, 8928, 0, 1431654400, 8928, 0, 1431654400, 8928, 0, 1431654400, 8928, 0, 1431654400, 8928, 0, 1431654400, 8928, 0, 1431654400, 8928, 0, 1431654400, 8928, 0, 1431654400, 8928, 0, 1431654400, 8944, 0, 1431654400, 8944, 0, 1431654400, 8944, 0, 1431654400, 8944, 0, 1431654400, 8944, 0, 1431654400, 8944, 0, 1431654400, 8944, 0, 1431654400, 8944, 0, 1431654400, 8944, 0, 1431654400, 8944, 0, 1431654400] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471203083968212_969_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471203083968212_969_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b33c2cf9 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471203083968212_969_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,132 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 10))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (62 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((WaveGetLaneIndex() < 3)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (56 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (46 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (72 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 204 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [3968, 1040, 1088, 3968, 1040, 1088, 3968, 1040, 1088, 3968, 1040, 1088, 3584, 7, 0, 3584, 7, 0, 3584, 7, 0, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 3200, 1431654720, 1431654677, 2944, 2048, 2048, 2944, 2048, 2048, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 2688, 2863309480, 2863309482, 4608, 85, 0, 4608, 85, 0, 4608, 85, 0, 4608, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471204888946648_970_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471204888946648_970_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..ddc2b2dc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471204888946648_970_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,95 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (((WaveGetLaneIndex() == 29) || (WaveGetLaneIndex() == 49))) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 47))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 72 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471205406750793_971_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471205406750793_971_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..7b419ada --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471205406750793_971_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,203 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 55)) || (WaveGetLaneIndex() == 6))) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 57))) { + if ((((WaveGetLaneIndex() == 14) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 44))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((93 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() >= 47)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((107 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax(5)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((114 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((134 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((161 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 7))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((180 << 6) | (i2 << 4)) | (i3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + } + } + case 1: { + if ((WaveGetLaneIndex() < 26)) { + if ((WaveGetLaneIndex() >= 35)) { + result = (result + WaveActiveSum(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (193 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 52))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((214 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 43))) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((225 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i4 == 1)) { + continue; + } + if ((i4 == 1)) { + break; + } + } + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (236 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 414 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8576, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 8592, 136348168, 2181570690, 10304, 1, 4194304, 10304, 1, 4194304, 10308, 1, 4194304, 10308, 1, 4194304, 10320, 1, 4194304, 10320, 1, 4194304, 10324, 1, 4194304, 10324, 1, 4194304, 11520, 0, 2, 11524, 0, 2, 11536, 0, 2, 11540, 0, 2, 13696, 14043, 0, 13696, 14043, 0, 13696, 14043, 0, 13696, 14043, 0, 13696, 14043, 0, 13696, 14043, 0, 13696, 14043, 0, 13696, 14043, 0, 13696, 14043, 0, 13696, 14043, 0, 13712, 14043, 0, 13712, 14043, 0, 13712, 14043, 0, 13712, 14043, 0, 13712, 14043, 0, 13712, 14043, 0, 13712, 14043, 0, 13712, 14043, 0, 13712, 14043, 0, 13712, 14043, 0, 14400, 14043, 0, 14400, 14043, 0, 14400, 14043, 0, 14400, 14043, 0, 14400, 14043, 0, 14400, 14043, 0, 14400, 14043, 0, 14400, 14043, 0, 14400, 14043, 0, 14400, 14043, 0, 14416, 14043, 0, 14416, 14043, 0, 14416, 14043, 0, 14416, 14043, 0, 14416, 14043, 0, 14416, 14043, 0, 14416, 14043, 0, 14416, 14043, 0, 14416, 14043, 0, 14416, 14043, 0, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295, 15104, 4294967295, 4294967295] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471237493555628_973_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471237493555628_973_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..972ba9cc --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471237493555628_973_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,220 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (33 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (42 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (47 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (54 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (58 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (69 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((86 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 60))) { + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((112 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(7)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((127 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 51)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((134 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 16) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((157 << 6) | (i0 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((166 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 432 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5504, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 5520, 2863311530, 2863311530, 10048, 1, 8192, 10048, 1, 8192, 10052, 1, 8192, 10052, 1, 8192, 10056, 1, 8192, 10056, 1, 8192, 10064, 1, 8192, 10064, 1, 8192, 10068, 1, 8192, 10068, 1, 8192, 10072, 1, 8192, 10072, 1, 8192, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10624, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765, 10640, 1431655765, 1431655765] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471278564133241_976_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471278564133241_976_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..840014bf --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471278564133241_976_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,229 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((WaveGetLaneIndex() < 12)) { + if ((WaveGetLaneIndex() >= 43)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (11 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (21 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (30 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 2))) { + uint counter0 = 0; + while ((counter0 < 3)) { + counter0 = (counter0 + 1); + if ((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 34))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((73 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 27)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 61)) || (WaveGetLaneIndex() == 36))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (98 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (105 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (115 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (125 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (134 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (139 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((163 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter2 = 0; + while ((counter2 < 2)) { + counter2 = (counter2 + 1); + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((185 << 6) | (counter1 << 4)) | (counter2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter2 == 1)) { + break; + } + } + if (((WaveGetLaneIndex() == 25) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((199 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 87 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1344, 17, 0, 1344, 17, 0, 6720, 559240, 0, 6720, 559240, 0, 6720, 559240, 0, 6720, 559240, 0, 6720, 559240, 0, 7360, 73, 0, 7360, 73, 0, 7360, 73, 0, 8576, 272696336, 68174084, 8576, 272696336, 68174084, 8576, 272696336, 68174084, 8576, 272696336, 68174084, 8576, 272696336, 68174084, 8576, 272696336, 68174084, 8576, 272696336, 68174084, 8576, 272696336, 68174084, 8576, 272696336, 68174084, 8576, 272696336, 68174084, 10448, 0, 2097152, 10464, 0, 2097152, 10480, 0, 2097152, 11860, 67108864, 0, 11876, 67108864, 0, 11892, 67108864, 0, 12752, 0, 8, 12768, 0, 8, 12784, 0, 8] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471488220025541_982_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471488220025541_982_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b323eeb3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471488220025541_982_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,364 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 12) || (WaveGetLaneIndex() >= 60))) { + if (((WaveGetLaneIndex() < 4) || (WaveGetLaneIndex() >= 48))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if ((WaveGetLaneIndex() >= 42)) { + if ((WaveGetLaneIndex() >= 32)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((42 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 14)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((49 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() == 4)) { + if ((WaveGetLaneIndex() == 58)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() < 18)) { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((94 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 51))) { + if ((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 17))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((120 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 4) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 50)) || (WaveGetLaneIndex() == 14))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((143 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((154 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + } + if ((WaveGetLaneIndex() == 60)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 19) || (WaveGetLaneIndex() >= 46))) { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (183 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 25)) || (WaveGetLaneIndex() == 46)) || (WaveGetLaneIndex() == 30))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((226 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 54))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((241 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (245 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + } + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 56))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (258 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + for (uint i4 = 0; (i4 < 2); i4 = (i4 + 1)) { + for (uint i5 = 0; (i5 < 2); i5 = (i5 + 1)) { + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((292 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 23)) || (WaveGetLaneIndex() == 39)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((311 << 6) | (i4 << 4)) | (i5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 16))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((326 << 6) | (i4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (335 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (340 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 13) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (358 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i6 = 0; (i6 < 2); i6 = (i6 + 1)) { + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((373 << 6) | (i6 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i6 == 1)) { + break; + } + } + } else { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (385 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter7 = 0; + while ((counter7 < 2)) { + counter7 = (counter7 + 1); + if ((((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 54)) || (WaveGetLaneIndex() == 28))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((415 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 20)) || (WaveGetLaneIndex() == 35)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 23))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((438 << 6) | (counter7 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter7 == 1)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 279 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 15, 4026531840, 1088, 15, 4026531840, 1088, 15, 4026531840, 1088, 15, 4026531840, 1088, 15, 4026531840, 1088, 15, 4026531840, 1088, 15, 4026531840, 1088, 15, 4026531840, 2704, 0, 4026531840, 2704, 0, 4026531840, 2704, 0, 4026531840, 2704, 0, 4026531840, 2708, 0, 4026531840, 2708, 0, 4026531840, 2708, 0, 4026531840, 2708, 0, 4026531840, 2720, 0, 4026531840, 2720, 0, 4026531840, 2720, 0, 4026531840, 2720, 0, 4026531840, 2724, 0, 4026531840, 2724, 0, 4026531840, 2724, 0, 4026531840, 2724, 0, 4026531840, 4112, 4, 0, 4128, 4, 0, 9856, 16, 0, 9872, 16, 0, 9888, 16, 0, 11712, 73, 0, 11712, 73, 0, 11712, 73, 0, 12736, 66560, 68173824, 12736, 66560, 68173824, 12736, 66560, 68173824, 12736, 66560, 68173824, 12736, 66560, 68173824, 14464, 4, 0, 14480, 4, 0, 15424, 4, 0, 15440, 4, 0, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 16512, 47, 4278190080, 19904, 256, 0, 19908, 256, 0, 19920, 256, 0, 19924, 256, 0, 20864, 65536, 0, 20880, 65536, 0, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 21760, 1145324612, 1145324612, 22912, 8, 2290089984, 22912, 8, 2290089984, 22912, 8, 2290089984, 22912, 8, 2290089984, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 24640, 2290649216, 34952, 28048, 8388608, 8, 28048, 8388608, 8] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471552595067844_983_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471552595067844_983_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..188b00e2 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471552595067844_983_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,79 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() < 14)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() >= 33)) { + result = (result + WaveActiveMax(10)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((20 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 270 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 848, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 864, 16383, 0, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1296, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294, 1312, 0, 4294967294] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471571747622385_985_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471571747622385_985_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..b3182e5c --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471571747622385_985_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,83 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 12 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 85, 0, 576, 85, 0, 576, 85, 0, 576, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471571954747621_986_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471571954747621_986_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..0e28e090 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471571954747621_986_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,156 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((19 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 45))) { + if (((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 56))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 62))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((52 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((71 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() < 7) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMin(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((90 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 21) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((101 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((WaveGetLaneIndex() == 11)) { + result = (result + WaveActiveMax(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((108 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((117 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (130 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 348 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5760, 85, 1430257664, 5760, 85, 1430257664, 5760, 85, 1430257664, 5760, 85, 1430257664, 5760, 85, 1430257664, 5760, 85, 1430257664, 5760, 85, 1430257664, 5760, 85, 1430257664, 5760, 85, 1430257664, 5764, 85, 1430257664, 5764, 85, 1430257664, 5764, 85, 1430257664, 5764, 85, 1430257664, 5764, 85, 1430257664, 5764, 85, 1430257664, 5764, 85, 1430257664, 5764, 85, 1430257664, 5764, 85, 1430257664, 5776, 85, 1430257664, 5776, 85, 1430257664, 5776, 85, 1430257664, 5776, 85, 1430257664, 5776, 85, 1430257664, 5776, 85, 1430257664, 5776, 85, 1430257664, 5776, 85, 1430257664, 5776, 85, 1430257664, 5780, 85, 1430257664, 5780, 85, 1430257664, 5780, 85, 1430257664, 5780, 85, 1430257664, 5780, 85, 1430257664, 5780, 85, 1430257664, 5780, 85, 1430257664, 5780, 85, 1430257664, 5780, 85, 1430257664, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6464, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6468, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6480, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304, 6484, 1398101, 1431650304] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471614726534185_988_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471614726534185_988_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3d0c86f8 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471614726534185_988_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,282 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((36 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((46 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((55 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((60 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((67 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((71 << 6) | (counter0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((i1 == 1)) { + break; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((106 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if (((((WaveGetLaneIndex() == 17) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 58)) || (WaveGetLaneIndex() == 44))) { + if ((((WaveGetLaneIndex() == 31) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 46))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((143 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((152 << 6) | (i2 << 4)) | (counter3 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (156 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (166 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() < 17)) { + uint counter4 = 0; + while ((counter4 < 2)) { + counter4 = (counter4 + 1); + if (((WaveGetLaneIndex() < 17) || (WaveGetLaneIndex() >= 49))) { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 53))) { + result = (result + WaveActiveMax(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((194 << 6) | (counter4 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (199 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (218 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (222 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 414 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [2320, 16777216, 0, 2324, 16777216, 0, 2336, 16777216, 0, 2340, 16777216, 0, 2960, 1, 0, 2964, 1, 0, 2976, 1, 0, 2980, 1, 0, 3856, 1074004032, 4195328, 3856, 1074004032, 4195328, 3856, 1074004032, 4195328, 3856, 1074004032, 4195328, 3856, 1074004032, 4195328, 3860, 1074004032, 4195328, 3860, 1074004032, 4195328, 3860, 1074004032, 4195328, 3860, 1074004032, 4195328, 3860, 1074004032, 4195328, 3872, 1074004032, 4195328, 3872, 1074004032, 4195328, 3872, 1074004032, 4195328, 3872, 1074004032, 4195328, 3872, 1074004032, 4195328, 3876, 1074004032, 4195328, 3876, 1074004032, 4195328, 3876, 1074004032, 4195328, 3876, 1074004032, 4195328, 3876, 1074004032, 4195328, 4304, 32776, 0, 4304, 32776, 0, 4308, 32776, 0, 4308, 32776, 0, 4320, 32776, 0, 4320, 32776, 0, 4324, 32776, 0, 4324, 32776, 0, 5312, 272696336, 68174084, 5312, 272696336, 68174084, 5312, 272696336, 68174084, 5312, 272696336, 68174084, 5312, 272696336, 68174084, 5312, 272696336, 68174084, 5312, 272696336, 68174084, 5312, 272696336, 68174084, 5312, 272696336, 68174084, 5312, 272696336, 68174084, 9732, 545392672, 136348168, 9732, 545392672, 136348168, 9732, 545392672, 136348168, 9732, 545392672, 136348168, 9732, 545392672, 136348168, 9732, 545392672, 136348168, 9732, 545392672, 136348168, 9732, 545392672, 136348168, 9732, 545392672, 136348168, 9732, 545392672, 136348168, 9736, 545392672, 136348168, 9736, 545392672, 136348168, 9736, 545392672, 136348168, 9736, 545392672, 136348168, 9736, 545392672, 136348168, 9736, 545392672, 136348168, 9736, 545392672, 136348168, 9736, 545392672, 136348168, 9736, 545392672, 136348168, 9736, 545392672, 136348168, 9748, 545392672, 136348168, 9748, 545392672, 136348168, 9748, 545392672, 136348168, 9748, 545392672, 136348168, 9748, 545392672, 136348168, 9748, 545392672, 136348168, 9748, 545392672, 136348168, 9748, 545392672, 136348168, 9748, 545392672, 136348168, 9748, 545392672, 136348168, 9752, 545392672, 136348168, 9752, 545392672, 136348168, 9752, 545392672, 136348168, 9752, 545392672, 136348168, 9752, 545392672, 136348168, 9752, 545392672, 136348168, 9752, 545392672, 136348168, 9752, 545392672, 136348168, 9752, 545392672, 136348168, 9752, 545392672, 136348168, 9764, 545392672, 136348168, 9764, 545392672, 136348168, 9764, 545392672, 136348168, 9764, 545392672, 136348168, 9764, 545392672, 136348168, 9764, 545392672, 136348168, 9764, 545392672, 136348168, 9764, 545392672, 136348168, 9764, 545392672, 136348168, 9764, 545392672, 136348168, 9768, 545392672, 136348168, 9768, 545392672, 136348168, 9768, 545392672, 136348168, 9768, 545392672, 136348168, 9768, 545392672, 136348168, 9768, 545392672, 136348168, 9768, 545392672, 136348168, 9768, 545392672, 136348168, 9768, 545392672, 136348168, 9768, 545392672, 136348168, 10624, 73, 0, 10624, 73, 0, 10624, 73, 0, 12432, 18, 0, 12432, 18, 0, 12448, 18, 0, 12448, 18, 0, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 12736, 613566756, 1227133513, 13376, 85, 0, 13376, 85, 0, 13376, 85, 0, 13376, 85, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471769045903795_989_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471769045903795_989_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..3a76fc90 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471769045903795_989_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,93 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (23 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 264 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1152, 1363481681, 340870420, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295, 1472, 4294967295, 4294967295] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471786394724291_991_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471786394724291_991_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..949a5173 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471786394724291_991_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,171 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 30)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 40))) { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() == 4)) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((31 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 1)) { + if ((WaveGetLaneIndex() == 41)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 63)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((50 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 31)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 63)) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveSum(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (73 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (((((WaveGetLaneIndex() == 0) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 57)) || (WaveGetLaneIndex() == 1))) { + if ((((((WaveGetLaneIndex() == 2) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 53)) || (WaveGetLaneIndex() == 0))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if ((WaveGetLaneIndex() == 42)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((136 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 3); i2 = (i2 + 1)) { + if ((((WaveGetLaneIndex() == 19) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((159 << 6) | (counter1 << 4)) | (i2 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i2 == 1)) { + continue; + } + } + if ((WaveGetLaneIndex() == 48)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((169 << 6) | (counter1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter1 == 2)) { + break; + } + } + } + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 30 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084, 5248, 272696336, 68174084] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471829367616611_993_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471829367616611_993_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..82b8822f --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471829367616611_993_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,380 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if (((((WaveGetLaneIndex() == 8) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 44))) { + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 48)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (40 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (50 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (59 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (63 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((WaveGetLaneIndex() == 27) || (WaveGetLaneIndex() == 43)) || (WaveGetLaneIndex() == 2))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (78 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (93 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (103 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (112 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (131 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (141 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (148 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 14) || (WaveGetLaneIndex() >= 54))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((180 << 6) | (i0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (189 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (194 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 36)) { + if ((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 49))) { + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (229 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((246 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 51))) { + result = (result + WaveActiveMin(8)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (265 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } else { + if ((WaveGetLaneIndex() < 18)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (272 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter3 = 0; + while ((counter3 < 2)) { + counter3 = (counter3 + 1); + if ((WaveGetLaneIndex() < 24)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((286 << 6) | (counter3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i4 = 0; (i4 < 3); i4 = (i4 + 1)) { + if ((WaveGetLaneIndex() == 13)) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((301 << 6) | (counter3 << 4)) | (i4 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 49)) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (310 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (315 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (322 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (326 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 297 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [6592, 17, 0, 6592, 17, 0, 8704, 537002016, 2097664, 8704, 537002016, 2097664, 8704, 537002016, 2097664, 8704, 537002016, 2097664, 8704, 537002016, 2097664, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9024, 1145324612, 1145324612, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 9472, 838860, 0, 11524, 4097, 268435456, 11524, 4097, 268435456, 11524, 4097, 268435456, 11528, 4097, 268435456, 11528, 4097, 268435456, 11528, 4097, 268435456, 11540, 4097, 268435456, 11540, 4097, 268435456, 11540, 4097, 268435456, 11544, 4097, 268435456, 11544, 4097, 268435456, 11544, 4097, 268435456, 12096, 268501008, 1048832, 12096, 268501008, 1048832, 12096, 268501008, 1048832, 12096, 268501008, 1048832, 12096, 268501008, 1048832, 12416, 1048832, 16781313, 12416, 1048832, 16781313, 12416, 1048832, 16781313, 12416, 1048832, 16781313, 12416, 1048832, 16781313, 17408, 139810, 0, 17408, 139810, 0, 17408, 139810, 0, 17408, 139810, 0, 17408, 139810, 0, 18320, 2236962, 0, 18320, 2236962, 0, 18320, 2236962, 0, 18320, 2236962, 0, 18320, 2236962, 0, 18320, 2236962, 0, 18336, 2236962, 0, 18336, 2236962, 0, 18336, 2236962, 0, 18336, 2236962, 0, 18336, 2236962, 0, 18336, 2236962, 0, 19280, 8192, 0, 19284, 8192, 0, 19288, 8192, 0, 19296, 8192, 0, 19300, 8192, 0, 19304, 8192, 0, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20160, 1145324612, 1145324612, 20608, 559240, 0, 20608, 559240, 0, 20608, 559240, 0, 20608, 559240, 0, 20608, 559240, 0] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471848274040230_994_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471848274040230_994_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..94e8521b --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471848274040230_994_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,100 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 53))) { + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(9)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (17 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if ((WaveGetLaneIndex() == 20)) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((34 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((WaveGetLaneIndex() == 40)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((43 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i0 == 1)) { + continue; + } + } + if (((WaveGetLaneIndex() < 2) || (WaveGetLaneIndex() >= 44))) { + result = (result + WaveActiveMin(6)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (57 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 78 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 1088, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144, 3648, 3, 4292870144] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471848641877688_995_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471848641877688_995_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..eece3083 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471848641877688_995_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,238 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + if ((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 60))) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (81 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 11) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 47)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 41))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (77 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if ((((((WaveGetLaneIndex() == 12) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 49)) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (71 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 16)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 50))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (67 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (91 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((WaveGetLaneIndex() & 1) == 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (108 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 1: { + if (((WaveGetLaneIndex() < 5) || (WaveGetLaneIndex() >= 52))) { + if (((WaveGetLaneIndex() < 6) || (WaveGetLaneIndex() >= 58))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (126 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (136 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 18)) || (WaveGetLaneIndex() == 33)) || (WaveGetLaneIndex() == 56))) { + if (((WaveGetLaneIndex() == 21) || (WaveGetLaneIndex() == 35))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (162 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 20) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (173 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 26)) || (WaveGetLaneIndex() == 42)) || (WaveGetLaneIndex() == 58))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (192 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 32)) || (WaveGetLaneIndex() == 42))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (209 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() & 1) == 0)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (223 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + break; + } + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (228 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 129 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [5184, 8192, 268435520, 5184, 8192, 268435520, 5184, 8192, 268435520, 4928, 67584, 1073775104, 4928, 67584, 1073775104, 4928, 67584, 1073775104, 4928, 67584, 1073775104, 4928, 67584, 1073775104, 4544, 4096, 132104, 4544, 4096, 132104, 4544, 4096, 132104, 4544, 4096, 132104, 4288, 512, 262148, 4288, 512, 262148, 4288, 512, 262148, 5824, 73, 0, 5824, 73, 0, 5824, 73, 0, 8064, 18, 603979776, 8064, 18, 603979776, 8064, 18, 603979776, 8064, 18, 603979776, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513, 14592, 613566756, 1227133513] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471865260593028_997_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471865260593028_997_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..90357fe3 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471865260593028_997_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,106 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + for (uint i0 = 0; (i0 < 2); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((16 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 3); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((33 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 41)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 40))) { + if (((((WaveGetLaneIndex() == 13) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 59)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((69 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 42))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((80 << 6) | (i0 << 4)) | (i1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((i1 == 1)) { + continue; + } + } + if ((i0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 1416 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1024, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 1040, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2112, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2116, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2120, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2128, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2132, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 2136, 2863311530, 2863311530, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5120, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5124, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5128, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5136, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5140, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056, 5144, 65533, 4278189056] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471925989303811_998_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471925989303811_998_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..2994eb86 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471925989303811_998_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,280 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (9 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (18 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (28 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if ((WaveGetLaneIndex() >= 34)) { + if ((WaveGetLaneIndex() >= 55)) { + result = (result + WaveActiveMin(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (38 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i0 = 0; (i0 < 3); i0 = (i0 + 1)) { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((55 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((64 << 6) | (i0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + case 2: { + if (((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 47))) { + if (((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 38))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (82 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i1 = 0; (i1 < 2); i1 = (i1 + 1)) { + if (((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMin(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((101 << 6) | (i1 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i1 == 1)) { + continue; + } + } + } else { + if ((WaveGetLaneIndex() == 3)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (111 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i2 = 0; (i2 < 2); i2 = (i2 + 1)) { + if ((((((WaveGetLaneIndex() == 1) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 1))) { + result = (result + WaveActiveMin(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((142 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 37)) || (WaveGetLaneIndex() == 56)) || (WaveGetLaneIndex() == 8))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((167 << 6) | (i2 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + break; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if (((((WaveGetLaneIndex() == 5) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 59))) { + if (((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 29)) || (WaveGetLaneIndex() == 36)) || (WaveGetLaneIndex() == 61))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (204 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((((WaveGetLaneIndex() == 3) || (WaveGetLaneIndex() == 17)) || (WaveGetLaneIndex() == 45)) || (WaveGetLaneIndex() == 63))) { + if ((((WaveGetLaneIndex() == 9) || (WaveGetLaneIndex() == 21)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (234 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + for (uint i3 = 0; (i3 < 2); i3 = (i3 + 1)) { + if (((WaveGetLaneIndex() & 1) == 1)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((251 << 6) | (i3 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((i3 == 1)) { + break; + } + } + if (((((WaveGetLaneIndex() == 10) || (WaveGetLaneIndex() == 28)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 5))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (275 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (284 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (289 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + uint counter4 = 0; + while ((counter4 < 3)) { + counter4 = (counter4 + 1); + uint counter5 = 0; + while ((counter5 < 3)) { + counter5 = (counter5 + 1); + if ((WaveGetLaneIndex() >= 52)) { + result = (result + WaveActiveMax(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((310 << 6) | (counter4 << 4)) | (counter5 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 735 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [576, 73, 0, 576, 73, 0, 576, 73, 0, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 1152, 272696336, 68174084, 2432, 0, 612368384, 2432, 0, 612368384, 2432, 0, 612368384, 3520, 0, 68174084, 3520, 0, 68174084, 3520, 0, 68174084, 3520, 0, 68174084, 3520, 0, 68174084, 3536, 0, 68174084, 3536, 0, 68174084, 3536, 0, 68174084, 3536, 0, 68174084, 3536, 0, 68174084, 3552, 0, 68174084, 3552, 0, 68174084, 3552, 0, 68174084, 3552, 0, 68174084, 3552, 0, 68174084, 4096, 0, 545392672, 4096, 0, 545392672, 4096, 0, 545392672, 4096, 0, 545392672, 4096, 0, 545392672, 4112, 0, 545392672, 4112, 0, 545392672, 4112, 0, 545392672, 4112, 0, 545392672, 4112, 0, 545392672, 4128, 0, 545392672, 4128, 0, 545392672, 4128, 0, 545392672, 4128, 0, 545392672, 4128, 0, 545392672, 9088, 0, 1073741824, 9104, 0, 1073741824, 10688, 256, 16777216, 10688, 256, 16777216, 10704, 256, 16777216, 10704, 256, 16777216, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18176, 1363481681, 340870420, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 18496, 4294967295, 4294967295, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19860, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19864, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19868, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19876, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19880, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19884, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19892, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19896, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720, 19900, 0, 4293918720] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471954179639156_999_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471954179639156_999_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..530641f0 --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471954179639156_999_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,306 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + uint counter1 = 0; + while ((counter1 < 3)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 1) || (WaveGetLaneIndex() >= 63))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((29 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 16) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveMax(WaveGetLaneIndex())); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((40 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + if ((counter0 == 1)) { + break; + } + } + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (52 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if ((WaveGetLaneIndex() < 6)) { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (64 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + switch ((WaveGetLaneIndex() % 3)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (74 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (83 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (88 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (92 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((WaveGetLaneIndex() >= 44)) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (99 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } else { + switch ((WaveGetLaneIndex() % 2)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (109 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (118 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + default: { + result = (result + WaveActiveSum(99)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (122 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + break; + } + } + if ((((((WaveGetLaneIndex() == 6) || (WaveGetLaneIndex() == 24)) || (WaveGetLaneIndex() == 38)) || (WaveGetLaneIndex() == 62)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (145 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + break; + } + } + switch ((WaveGetLaneIndex() % 4)) { + case 0: { + if ((WaveGetLaneIndex() < 8)) { + result = (result + WaveActiveSum(1)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (155 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 1: { + if (((WaveGetLaneIndex() % 2) == 0)) { + result = (result + WaveActiveSum(2)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (164 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + case 2: { + if (true) { + result = (result + WaveActiveSum(3)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (169 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + case 3: { + if ((WaveGetLaneIndex() < 20)) { + result = (result + WaveActiveSum(4)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (176 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + break; + } + } + if ((((WaveGetLaneIndex() == 18) || (WaveGetLaneIndex() == 34)) || (WaveGetLaneIndex() == 39))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 1))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (238 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() < 15) || (WaveGetLaneIndex() >= 59))) { + result = (result + WaveActiveMin((WaveGetLaneIndex() + 2))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (232 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 19)) || (WaveGetLaneIndex() == 44)) || (WaveGetLaneIndex() == 53))) { + result = (result + WaveActiveMax((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (226 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } else { + if (((WaveGetLaneIndex() & 1) == 0)) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 4))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (220 << 6); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 501 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [1876, 1, 2147483648, 1876, 1, 2147483648, 1880, 1, 2147483648, 1880, 1, 2147483648, 1884, 1, 2147483648, 1884, 1, 2147483648, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2580, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2584, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 2588, 37449, 2454265856, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 3328, 1363481681, 340870420, 4096, 63, 0, 4096, 63, 0, 4096, 63, 0, 4096, 63, 0, 4096, 63, 0, 4096, 63, 0, 4736, 9, 0, 4736, 9, 0, 5312, 16, 0, 5632, 36, 0, 5632, 36, 0, 6976, 64, 0, 9280, 16777280, 1207959616, 9280, 16777280, 1207959616, 9280, 16777280, 1207959616, 9280, 16777280, 1207959616, 9280, 16777280, 1207959616, 9920, 17, 0, 9920, 17, 0, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 10816, 1717986918, 1717986918, 11264, 559240, 0, 11264, 559240, 0, 11264, 559240, 0, 11264, 559240, 0, 11264, 559240, 0, 15232, 262144, 132, 15232, 262144, 132, 15232, 262144, 132, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14848, 32767, 4160749568, 14464, 524288, 2101248, 14464, 524288, 2101248, 14464, 524288, 2101248, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385, 14080, 1431371776, 89474385] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/WaveSize64BitTracking/tests/program_1756471981721029703_1000_increment_0_WaveParticipantBitTracking.test b/test/WaveSize64BitTracking/tests/program_1756471981721029703_1000_increment_0_WaveParticipantBitTracking.test new file mode 100644 index 00000000..096a782a --- /dev/null +++ b/test/WaveSize64BitTracking/tests/program_1756471981721029703_1000_increment_0_WaveParticipantBitTracking.test @@ -0,0 +1,115 @@ +#--- source.hlsl +RWStructuredBuffer _participant_bit : register(u0); +RWStructuredBuffer _wave_op_index : register(u1); + +[numthreads(64, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + uint result = 0; + uint counter0 = 0; + while ((counter0 < 2)) { + counter0 = (counter0 + 1); + if ((WaveGetLaneIndex() >= 54)) { + result = (result + WaveActiveMin(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((13 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() == 28) || (WaveGetLaneIndex() == 57))) { + if (((((WaveGetLaneIndex() == 7) || (WaveGetLaneIndex() == 22)) || (WaveGetLaneIndex() == 40)) || (WaveGetLaneIndex() == 59))) { + result = (result + WaveActiveSum((WaveGetLaneIndex() + 3))); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((41 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + uint counter1 = 0; + while ((counter1 < 2)) { + counter1 = (counter1 + 1); + if (((WaveGetLaneIndex() < 10) || (WaveGetLaneIndex() >= 51))) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((59 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if (((WaveGetLaneIndex() < 8) || (WaveGetLaneIndex() >= 45))) { + result = (result + WaveActiveSum(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = (((70 << 6) | (counter0 << 4)) | (counter1 << 2)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + } + } + if ((WaveGetLaneIndex() >= 38)) { + result = (result + WaveActiveMax(result)); + uint temp = 0; + InterlockedAdd(_wave_op_index[0], 3, temp); + _participant_bit[temp] = ((77 << 6) | (counter0 << 4)); + uint4 ballot = WaveActiveBallot(1); + _participant_bit[(temp + 1)] = ballot.x; + _participant_bit[(temp + 2)] = ballot.y; + } + if ((counter0 == 1)) { + break; + } + } +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] # Single dispatch for 64 threads +Buffers: + - Name: _participant_bit + Format: UInt32 + Stride: 4 + Fill: 0 + Size: 120 + - Name: expected_bit_patterns + Format: UInt32 + Stride: 4 + Data: [848, 0, 4290772992, 848, 0, 4290772992, 848, 0, 4290772992, 848, 0, 4290772992, 848, 0, 4290772992, 848, 0, 4290772992, 848, 0, 4290772992, 848, 0, 4290772992, 848, 0, 4290772992, 848, 0, 4290772992, 3796, 0, 33554432, 3800, 0, 33554432, 4500, 0, 33554432, 4504, 0, 33554432, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232, 4944, 0, 4294967232] + - Name: _wave_op_index + Format: UInt32 + Stride: 4 + Data: [0] +Results: + - Result: BitTrackingValidation + Rule: BufferParticipantPattern + GroupSize: 3 + Actual: _participant_bit + Expected: expected_bit_patterns +DescriptorSets: + - Resources: + - Name: _participant_bit + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: _wave_op_index + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o